diff --git a/src/nvmath/Basis.cpp b/src/nvmath/Basis.cpp index 085e25b..8664338 100644 --- a/src/nvmath/Basis.cpp +++ b/src/nvmath/Basis.cpp @@ -49,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/) return; } } - normal = ::normalize(normal, epsilon); + normal = nv::normalize(normal, epsilon); tangent -= normal * dot(normal, tangent); bitangent -= normal * dot(normal, bitangent); @@ -68,7 +68,8 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/) } else { - tangent = ::normalize(tangent, epsilon); +#if 0 + tangent = nv::normalize(tangent, epsilon); bitangent -= tangent * dot(tangent, bitangent); if (length(bitangent) < epsilon) @@ -78,11 +79,47 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/) } else { - tangent = ::normalize(tangent, epsilon); + bitangent = nv::normalize(bitangent, epsilon); } +#else + if (length(bitangent) < epsilon) + { + bitangent = cross(tangent, normal); + nvCheck(isNormalized(bitangent)); + } + else + { + tangent = nv::normalize(tangent); + bitangent = nv::normalize(bitangent); + + Vector3 bisector = nv::normalize(tangent + bitangent); + Vector3 axis = cross(bisector, normal); + + nvDebugCheck(isNormalized(axis, epsilon)); + nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon)); + + if (dot(axis, tangent) > 0) + { + tangent = nv::normalize(bisector + axis); + bitangent = nv::normalize(bisector - axis); + } + else + { + tangent = nv::normalize(bisector - axis); + bitangent = nv::normalize(bisector + axis); + } + } +#endif } - // Check vector lengths. + /*// Check vector lengths. + if (!isNormalized(normal, epsilon)) + { + nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z()); + nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z()); + nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z()); + }*/ + nvCheck(isNormalized(normal, epsilon)); nvCheck(isNormalized(tangent, epsilon)); nvCheck(isNormalized(bitangent, epsilon)); @@ -125,9 +162,18 @@ void Basis::buildFrameForDirection(Vector3::Arg d) bitangent = cross(normal, tangent); } +bool Basis::isValid() const +{ + if (equal(normal, Vector3(zero))) return false; + if (equal(tangent, Vector3(zero))) return false; + if (equal(bitangent, Vector3(zero))) return false; + + if (equal(determinant(), 0.0f)) return false; + + return true; +} -/* /// Transform by this basis. (From this basis to object space). Vector3 Basis::transform(Vector3::Arg v) const { @@ -144,30 +190,31 @@ Vector3 Basis::transformT(Vector3::Arg v) } /// Transform by the inverse. (From object space to this basis). -/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned. +/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned. Vector3 Basis::transformI(Vector3::Arg v) const { const float det = determinant(); - nvCheck(!equalf(det, 0.0f)); + nvDebugCheck(!equal(det, 0.0f, 0.0f)); const float idet = 1.0f / det; // Rows of the inverse matrix. - Vector3 r0, r1, r2; - r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet; - r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet; - r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet; + Vector3 r0( + (bitangent.y() * normal.z() - bitangent.z() * normal.y()), + -(bitangent.x() * normal.z() - bitangent.z() * normal.x()), + (bitangent.x() * normal.y() - bitangent.y() * normal.x())); - r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet; - r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet; - r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet; + Vector3 r1( + -(tangent.y() * normal.z() - tangent.z() * normal.y()), + (tangent.x() * normal.z() - tangent.z() * normal.x()), + -(tangent.x() * normal.y() - tangent.y() * normal.x())); - r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet; - r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet; - r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet; + Vector3 r2( + (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()), + -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()), + (tangent.x() * bitangent.y() - tangent.y() * bitangent.x())); - return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)); + return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet; } -*/ diff --git a/src/nvmath/Basis.h b/src/nvmath/Basis.h index 7adde57..dca0442 100644 --- a/src/nvmath/Basis.h +++ b/src/nvmath/Basis.h @@ -54,7 +54,8 @@ namespace nv tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y(); } - /* + bool isValid() const; + // Get transform matrix for this basis. NVMATH_API Matrix matrix() const; @@ -66,7 +67,7 @@ namespace nv // Transform by the inverse. (From object space to this basis). NVMATH_API Vector3 transformI(Vector3::Arg v) const; - */ + Vector3 tangent; Vector3 bitangent; diff --git a/src/nvmath/Box.h b/src/nvmath/Box.h index a8426a3..ed88a1b 100644 --- a/src/nvmath/Box.h +++ b/src/nvmath/Box.h @@ -9,6 +9,7 @@ namespace nv { +class Stream; /// Axis Aligned Bounding Box. class Box @@ -27,11 +28,13 @@ public: // Cast operators. operator const float * () const { return reinterpret_cast(this); } - /// Min corner of the box. - Vector3 mins() const { return m_mins; } + // Min corner of the box. + Vector3 minCorner() const { return m_mins; } + Vector3 & minCorner() { return m_mins; } - /// Max corner of the box. - Vector3 maxs() const { return m_maxs; } + // Max corner of the box. + Vector3 maxCorner() const { return m_maxs; } + Vector3 & maxCorner() { return m_maxs; } /// Clear the bounds. void clearBounds() @@ -108,7 +111,7 @@ public: float area() const { const Vector3 d = extents(); - return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z()); + return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z()); } /// Get the volume of the box. @@ -118,6 +121,16 @@ public: return 8.0f * (d.x() * d.y() * d.z()); } + /// Return true if the box contains the given point. + bool contains(Vector3::Arg p) const + { + return + m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() && + m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z(); + } + + friend Stream & operator<< (Stream & s, Box & box); + private: Vector3 m_mins; @@ -125,15 +138,6 @@ private: }; -/* -/// Point inside box test. -inline bool pointInsideBox(const Box & b, Vector3::Arg p) const -{ - return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() && - m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z()); -} -*/ - } // nv namespace diff --git a/src/nvmath/CMakeLists.txt b/src/nvmath/CMakeLists.txt index 7ea4a80..c7d7e94 100644 --- a/src/nvmath/CMakeLists.txt +++ b/src/nvmath/CMakeLists.txt @@ -5,13 +5,19 @@ SET(MATH_SRCS Vector.h Matrix.h Quaternion.h + Plane.h Plane.cpp Box.h Color.h Montecarlo.h Montecarlo.cpp Random.h Random.cpp SphericalHarmonic.h SphericalHarmonic.cpp Basis.h Basis.cpp - Triangle.h Triangle.cpp TriBox.cpp) + Triangle.h Triangle.cpp TriBox.cpp + Polygon.h Polygon.cpp + TypeSerialization.h TypeSerialization.cpp + Sparse.h Sparse.cpp + Solver.h Solver.cpp + KahanSum.h) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/nvmath/KahanSum.h b/src/nvmath/KahanSum.h new file mode 100644 index 0000000..dac706f --- /dev/null +++ b/src/nvmath/KahanSum.h @@ -0,0 +1,38 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_KAHANSUM_H +#define NV_MATH_KAHANSUM_H + +#include + +namespace nv +{ + + class KahanSum + { + public: + KahanSum() : accum(0.0f), err(0) {}; + + void add(float f) + { + float compensated = f + err; + float tmp = accum + compensated; + err = accum - tmp; + err += compensated; + accum = tmp; + } + + float sum() const + { + return accum; + } + + private: + float accum; + float err; + }; + +} // nv namespace + + +#endif // NV_MATH_KAHANSUM_H diff --git a/src/nvmath/Plane.cpp b/src/nvmath/Plane.cpp new file mode 100644 index 0000000..979c099 --- /dev/null +++ b/src/nvmath/Plane.cpp @@ -0,0 +1,17 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include "Plane.h" +#include "Matrix.h" + +namespace nv +{ + Plane transformPlane(const Matrix& m, Plane::Arg p) + { + Vector3 newVec = transformVector(m, p.vector()); + + Vector3 ptInPlane = p.offset() * p.vector(); + ptInPlane = transformPoint(m, ptInPlane); + + return Plane(newVec, ptInPlane); + } +} diff --git a/src/nvmath/Plane.h b/src/nvmath/Plane.h new file mode 100644 index 0000000..bc7128c --- /dev/null +++ b/src/nvmath/Plane.h @@ -0,0 +1,77 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_PLANE_H +#define NV_MATH_PLANE_H + +#include +#include + +namespace nv +{ + class Matrix; + + + class NVMATH_CLASS Plane + { + public: + typedef Plane const & Arg; + + Plane(); + Plane(float x, float y, float z, float w); + Plane(Vector4::Arg v); + Plane(Vector3::Arg v, float d); + Plane(Vector3::Arg normal, Vector3::Arg point); + + const Plane & operator=(Plane::Arg v); + + Vector3 vector() const; + scalar offset() const; + + const Vector4 & asVector() const; + Vector4 & asVector(); + + void operator*=(scalar s); + + private: + Vector4 p; + }; + + inline Plane::Plane() {} + inline Plane::Plane(float x, float y, float z, float w) : p(x, y, z, w) {} + inline Plane::Plane(Vector4::Arg v) : p(v) {} + inline Plane::Plane(Vector3::Arg v, float d) : p(v, d) {} + inline Plane::Plane(Vector3::Arg normal, Vector3::Arg point) : p(normal, dot(normal, point)) {} + + inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; } + + inline Vector3 Plane::vector() const { return p.xyz(); } + inline scalar Plane::offset() const { return p.w(); } + + inline const Vector4 & Plane::asVector() const { return p; } + inline Vector4 & Plane::asVector() { return p; } + + // Normalize plane. + inline Plane normalize(Plane::Arg plane, float epsilon = NV_EPSILON) + { + const float len = length(plane.vector()); + nvDebugCheck(!isZero(len, epsilon)); + const float inv = 1.0f / len; + return Plane(plane.asVector() * inv); + } + + // Get the distance from the given point to this plane. + inline float distance(Plane::Arg plane, Vector3::Arg point) + { + return dot(plane.vector(), point) - plane.offset(); + } + + inline void Plane::operator*=(scalar s) + { + scale(p, s); + } + + Plane transformPlane(const Matrix&, Plane::Arg); + +} // nv namespace + +#endif // NV_MATH_PLANE_H diff --git a/src/nvmath/Polygon.cpp b/src/nvmath/Polygon.cpp new file mode 100644 index 0000000..c5f6282 --- /dev/null +++ b/src/nvmath/Polygon.cpp @@ -0,0 +1,168 @@ +// This code is in the public domain -- Ignacio Castaño + +#include + +#include +#include + +using namespace nv; + + +Polygon::Polygon() +{ +} + +Polygon::Polygon(const Triangle & t) +{ + pointArray.resize(3); + pointArray[0] = t.v[0]; + pointArray[1] = t.v[1]; + pointArray[2] = t.v[2]; +} + +Polygon::Polygon(const Vector3 * points, uint vertexCount) +{ + pointArray.resize(vertexCount); + + for (uint i = 0; i < vertexCount; i++) + { + pointArray[i] = points[i]; + } +} + + +/// Compute polygon area. +float Polygon::area() const +{ + float total = 0; + + const uint pointCount = pointArray.count(); + for (uint i = 2; i < pointCount; i++) + { + Vector3 v1 = pointArray[i-1] - pointArray[0]; + Vector3 v2 = pointArray[i] - pointArray[0]; + + total += 0.5f * length(cross(v1, v2)); + } + + return total; +} + +/// Get the bounds of the polygon. +Box Polygon::bounds() const +{ + Box bounds; + bounds.clearBounds(); + foreach(p, pointArray) + { + bounds.addPointToBounds(pointArray[p]); + } + return bounds; +} + + +/// Get the plane of the polygon. +Plane Polygon::plane() const +{ + // @@ Do something better than this? + Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]); + return Vector4(n, dot(n, pointArray[0])); +} + + +/// Clip polygon to box. +uint Polygon::clipTo(const Box & box) +{ + const Plane posX( 1, 0, 0, box.maxCorner().x()); + const Plane negX(-1, 0, 0,-box.minCorner().x()); + const Plane posY( 0, 1, 0, box.maxCorner().y()); + const Plane negY( 0,-1, 0,-box.minCorner().y()); + const Plane posZ( 0, 0, 1, box.maxCorner().z()); + const Plane negZ( 0, 0,-1,-box.minCorner().z()); + + if (clipTo(posX) == 0) return 0; + if (clipTo(negX) == 0) return 0; + if (clipTo(posY) == 0) return 0; + if (clipTo(negY) == 0) return 0; + if (clipTo(posZ) == 0) return 0; + if (clipTo(negZ) == 0) return 0; + + return pointArray.count(); +} + + +/// Clip polygon to plane. +uint Polygon::clipTo(const Plane & plane) +{ + int count = 0; + + const uint pointCount = pointArray.count(); + + Array newPointArray(pointCount + 1); // @@ Do not create copy every time. + + Vector3 prevPoint = pointArray[pointCount - 1]; + float prevDist = dot(plane.vector(), prevPoint) - plane.offset(); + + for (uint i = 0; i < pointCount; i++) + { + const Vector3 point = pointArray[i]; + float dist = dot(plane.vector(), point) - plane.offset(); + + // @@ Handle points on plane better. + + if (dist <= 0) // interior. + { + if (prevDist > 0) // exterior + { + // Add segment intersection point. + Vector3 dp = point - prevPoint; + + float t = dist / prevDist; + newPointArray.append(point - dp * t); + } + + // Add interior point. + newPointArray.append(point); + } + else if (dist > 0 && prevDist < 0) + { + // Add segment intersection point. + Vector3 dp = point - prevPoint; + + float t = dist / prevDist; + newPointArray.append(point - dp * t); + } + + prevPoint = point; + prevDist = dist; + } + + swap(pointArray, newPointArray); + + return count; +} + + +void Polygon::removeColinearPoints() +{ + const uint pointCount = pointArray.count(); + + Array newPointArray(pointCount); + + for (uint i = 0 ; i < pointCount; i++) + { + int j = (i + 1) % pointCount; + int k = (i + pointCount - 1) % pointCount; + + Vector3 v1 = normalize(pointArray[j] - pointArray[i]); + Vector3 v2 = normalize(pointArray[i] - pointArray[k]); + + if (dot(v1, v2) < 0.999) + { + newPointArray.append(pointArray[i]); + } + } + + swap(pointArray, newPointArray); +} + diff --git a/src/nvmath/Polygon.h b/src/nvmath/Polygon.h new file mode 100644 index 0000000..e70463a --- /dev/null +++ b/src/nvmath/Polygon.h @@ -0,0 +1,45 @@ +// This code is in the public domain -- Ignacio Castaño + +#ifndef NV_MATH_POLYGON_H +#define NV_MATH_POLYGON_H + +#include + +#include +#include +#include + +namespace nv +{ + class Box; + class Plane; + class Triangle; + + + class Polygon + { + NV_FORBID_COPY(Polygon); + public: + + Polygon(); + Polygon(const Triangle & t); + Polygon(const Vector3 * points, uint vertexCount); + + float area() const; + Box bounds() const; + Plane plane() const; + + uint clipTo(const Box & box); + uint clipTo(const Plane & plane); + + void removeColinearPoints(); + + private: + + Array pointArray; + }; + + +} // nv namespace + +#endif // NV_MATH_POLYGON_H diff --git a/src/nvmath/Quaternion.h b/src/nvmath/Quaternion.h index b5007cc..1002ff9 100644 --- a/src/nvmath/Quaternion.h +++ b/src/nvmath/Quaternion.h @@ -51,7 +51,6 @@ namespace nv inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b) { - // @@ Efficient SIMD implementation? return Quaternion( + a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(), - a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(), @@ -59,6 +58,40 @@ namespace nv - a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w()); } + inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b) + { + return Quaternion( + + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(), + - a.x() * b.z() + a.z()*b.x() + a.w()*b.y(), + + a.x() * b.y() - a.y()*b.x() + a.w()*b.z(), + - a.x() * b.x() - a.y()*b.y() - a.z()*b.z() ); + } + + inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b) + { + return Quaternion( + + a.x() * b.w() + a.y()*b.z() - a.z()*b.y(), + - a.x() * b.z() + a.y()*b.w() + a.z()*b.x(), + + a.x() * b.y() - a.y()*b.x() + a.z()*b.w(), + - a.x() * b.x() - a.y()*b.y() - a.z()*b.z()); + } + + inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b) + { + return mul(a, b); + } + + inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b) + { + return mul(a, b); + } + + inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b) + { + return mul(a, b); + } + + inline Quaternion scale(Quaternion::Arg q, float s) { return scale(q.asVector(), s); @@ -122,6 +155,24 @@ namespace nv return Quaternion(Vector4(v * s, c)); } + inline Vector3 imag(Quaternion::Arg q) + { + return q.asVector().xyz(); + } + + inline float real(Quaternion::Arg q) + { + return q.w(); + } + + + /// Transform vector. + inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v) + { + Quaternion t = q * v * conjugate(q); + return imag(t); + } + } // nv namespace diff --git a/src/nvmath/Solver.cpp b/src/nvmath/Solver.cpp new file mode 100644 index 0000000..785376b --- /dev/null +++ b/src/nvmath/Solver.cpp @@ -0,0 +1,726 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include + +using namespace nv; + +namespace +{ + class Preconditioner + { + public: + // Virtual dtor. + virtual ~Preconditioner() { } + + // Apply preconditioning step. + virtual void apply(const FullVector & x, FullVector & y) const = 0; + }; + + + // Jacobi preconditioner. + class JacobiPreconditioner : public Preconditioner + { + public: + + JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width()) + { + nvCheck(M.isSquare()); + + for(uint x = 0; x < M.width(); x++) + { + float elem = M.getCoefficient(x, x); + nvDebugCheck( elem != 0.0f ); + + if (symmetric) + { + m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem)); + } + else + { + m_inverseDiagonal[x] = 1.0f / elem; + } + } + } + + void apply(const FullVector & x, FullVector & y) const + { + y *= x; + } + + private: + + FullVector m_inverseDiagonal; + + }; + +} // namespace + + +static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon); +static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon); + + +// Solve the symmetric system: At·A·x = At·b +void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/) +{ + nvDebugCheck(A.width() == x.dimension()); + nvDebugCheck(A.height() == b.dimension()); + nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly... + + const uint D = A.width(); + + FullVector Atb(D); + mult(Transposed, A, b, Atb); + + SparseMatrix AtA(D); + mult(Transposed, A, NoTransposed, A, AtA); + + SymmetricSolver(AtA, Atb, x, epsilon); +} + + +// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007 +void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/) +{ + nvDebugCheck(A.width() == x.dimension()); + nvDebugCheck(A.height() == b.dimension()); + nvDebugCheck(A.height() >= A.width() - lockedCount); + + // @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly. + + const uint D = A.width() - lockedCount; + nvDebugCheck(D > 0); + + // Compute: b - Al * xl + FullVector b_Alxl(b); + + for (uint y = 0; y < A.height(); y++) + { + const uint count = A.getRow(y).count(); + for (uint e = 0; e < count; e++) + { + uint column = A.getRow(y)[e].x; + + bool isFree = true; + for (uint i = 0; i < lockedCount; i++) + { + isFree &= (lockedParameters[i] != column); + } + + if (!isFree) + { + b_Alxl[y] -= x[column] * A.getRow(y)[e].v; + } + } + } + + // Remove locked columns from A. + SparseMatrix Af(D, A.height()); + + for (uint y = 0; y < A.height(); y++) + { + const uint count = A.getRow(y).count(); + for (uint e = 0; e < count; e++) + { + uint column = A.getRow(y)[e].x; + uint ix = column; + + bool isFree = true; + for (uint i = 0; i < lockedCount; i++) + { + isFree &= (lockedParameters[i] != column); + if (column > lockedParameters[i]) ix--; // shift columns + } + + if (isFree) + { + Af.setCoefficient(ix, y, A.getRow(y)[e].v); + } + } + } + + // Remove elements from x + FullVector xf(D); + + for (uint i = 0, j = 0; i < A.width(); i++) + { + bool isFree = true; + for (uint l = 0; l < lockedCount; l++) + { + isFree &= (lockedParameters[l] != i); + } + + if (isFree) + { + xf[j++] = x[i]; + } + } + + // Solve reduced system. + LeastSquaresSolver(Af, b_Alxl, xf, epsilon); + + // Copy results back to x. + for (uint i = 0, j = 0; i < A.width(); i++) + { + bool isFree = true; + for (uint l = 0; l < lockedCount; l++) + { + isFree &= (lockedParameters[l] != i); + } + + if (isFree) + { + x[i] = xf[j++]; + } + } +} + + +void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/) +{ + nvDebugCheck(A.height() == A.width()); + nvDebugCheck(A.height() == b.dimension()); + nvDebugCheck(b.dimension() == x.dimension()); + +// JacobiPreconditioner jacobi(A, true); + +// ConjugateGradientSolver(jacobi, A, b, x, epsilon); + ConjugateGradientSolver(A, b, x, epsilon); +} + + +/** + * Compute the solution of the sparse linear system Ab=x using the Conjugate + * Gradient method. + * + * Solving sparse linear systems: + * (1) A·x = b + * + * The conjugate gradient algorithm solves (1) only in the case that A is + * symmetric and positive definite. It is based on the idea of minimizing the + * function + * + * (2) f(x) = 1/2·x·A·x - b·x + * + * This function is minimized when its gradient + * + * (3) df = A·x - b + * + * is zero, which is equivalent to (1). The minimization is carried out by + * generating a succession of search directions p.k and improved minimizers x.k. + * At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k·p.k), + * and x.k+1 is set equal to the new point x.k + alfa.k·p.k. The p.k and x.k are + * built up in such a way that x.k+1 is also the minimizer of f over the whole + * vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N + * iterations you arrive at the minimizer over the entire vector space, i.e., the + * solution to (1). + * + * For a really good explanation of the method see: + * + * "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain", + * Jonhathan Richard Shewchuk. + * +**/ +/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon) +{ + nvDebugCheck( A.isSquare() ); + nvDebugCheck( A.width() == b.dimension() ); + nvDebugCheck( A.width() == x.dimension() ); + + int i = 0; + const int D = A.width(); + const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not. + + FullVector r(D); // residual + FullVector p(D); // search direction + FullVector q(D); // + float delta_0; + float delta_old; + float delta_new; + float alpha; + float beta; + + // r = b - A·x; + copy(b, r); + sgemv(-1, A, x, 1, r); + + // p = r; + copy(r, p); + + delta_new = dot( r, r ); + delta_0 = delta_new; + + while (i < i_max && delta_new > epsilon*epsilon*delta_0) + { + i++; + + // q = A·p + mult(A, p, q); + + // alpha = delta_new / p·q + alpha = delta_new / dot( p, q ); + + // x = alfa·p + x + saxpy(alpha, p, x); + + if ((i & 31) == 0) // recompute r after 32 steps + { + // r = b - A·x + copy(b, r); + sgemv(-1, A, x, 1, r); + } + else + { + // r = r - alpha·q + saxpy(-alpha, q, r); + } + + delta_old = delta_new; + delta_new = dot( r, r ); + + beta = delta_new / delta_old; + + // p = r + beta·p + copy(r, p); + saxpy(beta, p, r); + } + + return i; +} + + +// Conjugate gradient with preconditioner. +/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon) +{ + nvDebugCheck( A.isSquare() ); + nvDebugCheck( A.width() == b.dimension() ); + nvDebugCheck( A.width() == x.dimension() ); + + int i = 0; + const int D = A.width(); + const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not. + + FullVector r(D); // residual + FullVector p(D); // search direction + FullVector q(D); // + FullVector s(D); // preconditioned + float delta_0; + float delta_old; + float delta_new; + float alpha; + float beta; + + // r = b - A·x + copy(b, r); + sgemv(-1, A, x, 1, r); + + + // p = M^-1 · r + preconditioner.apply(r, p); + //copy(r, p); + + + delta_new = dot(r, p); + delta_0 = delta_new; + + while (i < i_max && delta_new > epsilon*epsilon*delta_0) + { + i++; + + // q = A·p + mult(A, p, q); + + // alpha = delta_new / p·q + alpha = delta_new / dot(p, q); + + // x = alfa·p + x + saxpy(alpha, p, x); + + if ((i & 31) == 0) // recompute r after 32 steps + { + // r = b - A·x + copy(b, r); + sgemv(-1, A, x, 1, r); + } + else + { + // r = r - alfa·q + saxpy(-alpha, q, r); + } + + // s = M^-1 · r + preconditioner.apply(r, s); + //copy(r, s); + + delta_old = delta_new; + delta_new = dot( r, s ); + + beta = delta_new / delta_old; + + // p = s + beta·p + copy(s, p); + saxpy(beta, p, s); + } + + return i; +} + + +#if 0 // Nonsymmetric solvers + +/** Bi-conjugate gradient method. */ +MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) { + piDebugCheck( A.IsSquare() ); + piDebugCheck( A.Width() == b.Dim() ); + piDebugCheck( A.Width() == x.Dim() ); + + int i = 0; + const int D = A.Width(); + const int i_max = 4 * D; + + float resid; + float rho_1 = 0; + float rho_2 = 0; + float alpha; + float beta; + + DenseVector r(D); + DenseVector rtilde(D); + DenseVector p(D); + DenseVector ptilde(D); + DenseVector q(D); + DenseVector qtilde(D); + DenseVector tmp(D); // temporal vector. + + // r = b - A·x; + A.Product( x, tmp ); + r.Sub( b, tmp ); + + // rtilde = r + rtilde.Set( r ); + + // p = r; + p.Set( r ); + + // ptilde = rtilde + ptilde.Set( rtilde ); + + + + float normb = b.Norm(); + if( normb == 0.0 ) normb = 1; + + // test convergence + resid = r.Norm() / normb; + if( resid < epsilon ) { + // method converges? + return 0; + } + + + while( i < i_max ) { + + i++; + + rho_1 = DenseVectorDotProduct( r, rtilde ); + + if( rho_1 == 0 ) { + // method fails. + return -i; + } + + if (i == 1) { + p.Set( r ); + ptilde.Set( rtilde ); + } + else { + beta = rho_1 / rho_2; + + // p = r + beta * p; + p.Mad( r, p, beta ); + + // ptilde = ztilde + beta * ptilde; + ptilde.Mad( rtilde, ptilde, beta ); + } + + // q = A * p; + A.Product( p, q ); + + // qtilde = A^t * ptilde; + A.TransProduct( ptilde, qtilde ); + + alpha = rho_1 / DenseVectorDotProduct( ptilde, q ); + + // x += alpha * p; + x.Mad( x, p, alpha ); + + // r -= alpha * q; + r.Mad( r, q, -alpha ); + + // rtilde -= alpha * qtilde; + rtilde.Mad( rtilde, qtilde, -alpha ); + + rho_2 = rho_1; + + // test convergence + resid = r.Norm() / normb; + if( resid < epsilon ) { + // method converges + return i; + } + } + + return i; +} + + +/** Bi-conjugate gradient stabilized method. */ +int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) { + piDebugCheck( A.IsSquare() ); + piDebugCheck( A.Width() == b.Dim() ); + piDebugCheck( A.Width() == x.Dim() ); + + int i = 0; + const int D = A.Width(); + const int i_max = 2 * D; + + + float resid; + float rho_1 = 0; + float rho_2 = 0; + float alpha = 0; + float beta = 0; + float omega = 0; + + DenseVector p(D); + DenseVector phat(D); + DenseVector s(D); + DenseVector shat(D); + DenseVector t(D); + DenseVector v(D); + + DenseVector r(D); + DenseVector rtilde(D); + + DenseVector tmp(D); + + // r = b - A·x; + A.Product( x, tmp ); + r.Sub( b, tmp ); + + // rtilde = r + rtilde.Set( r ); + + + float normb = b.Norm(); + if( normb == 0.0 ) normb = 1; + + // test convergence + resid = r.Norm() / normb; + if( resid < epsilon ) { + // method converges? + return 0; + } + + + while( i + +namespace nv +{ + + // Linear solvers. + NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f); + NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f); + NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f); +// NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f); + +} // nv namespace + + +#endif // NV_MATH_SOLVER_H diff --git a/src/nvmath/Sparse.cpp b/src/nvmath/Sparse.cpp new file mode 100644 index 0000000..885c49f --- /dev/null +++ b/src/nvmath/Sparse.cpp @@ -0,0 +1,831 @@ +// This code is in the public domain -- Ignacio Castaño + +#include +#include + +using namespace nv; + + +/// Ctor. +FullVector::FullVector(uint dim) +{ + m_array.resize(dim); +} + +/// Copy ctor. +FullVector::FullVector(const FullVector & v) : m_array(v.m_array) +{ +} + +/// Copy operator +const FullVector & FullVector::operator=(const FullVector & v) +{ + nvCheck(dimension() == v.dimension()); + + m_array = v.m_array; + + return *this; +} + + +void FullVector::fill(float f) +{ + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] = f; + } +} + +void FullVector::operator+= (const FullVector & v) +{ + nvDebugCheck(dimension() == v.dimension()); + + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] += v.m_array[i]; + } +} + +void FullVector::operator-= (const FullVector & v) +{ + nvDebugCheck(dimension() == v.dimension()); + + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] -= v.m_array[i]; + } +} + +void FullVector::operator*= (const FullVector & v) +{ + nvDebugCheck(dimension() == v.dimension()); + + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] *= v.m_array[i]; + } +} + +void FullVector::operator+= (float f) +{ + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] += f; + } +} + +void FullVector::operator-= (float f) +{ + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] -= f; + } +} + +void FullVector::operator*= (float f) +{ + const uint dim = dimension(); + for (uint i = 0; i < dim; i++) + { + m_array[i] *= f; + } +} + + +void nv::saxpy(float a, const FullVector & x, FullVector & y) +{ + nvDebugCheck(x.dimension() == y.dimension()); + + const uint dim = x.dimension(); + for (uint i = 0; i < dim; i++) + { + y[i] += a * x[i]; + } +} + +void nv::copy(const FullVector & x, FullVector & y) +{ + nvDebugCheck(x.dimension() == y.dimension()); + + const uint dim = x.dimension(); + for (uint i = 0; i < dim; i++) + { + y[i] = x[i]; + } +} + +void nv::scal(float a, FullVector & x) +{ + const uint dim = x.dimension(); + for (uint i = 0; i < dim; i++) + { + x[i] *= a; + } +} + +float nv::dot(const FullVector & x, const FullVector & y) +{ + nvDebugCheck(x.dimension() == y.dimension()); + + const uint dim = x.dimension(); + + /*float sum = 0; + for (uint i = 0; i < dim; i++) + { + sum += x[i] * y[i]; + } + return sum;*/ + + KahanSum kahan; + + for (uint i = 0; i < dim; i++) + { + kahan.add(x[i] * y[i]); + } + + return kahan.sum(); +} + + +FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d) +{ + m_array.resize(d*d, 0.0f); +} + +FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h) +{ + m_array.resize(w*h, 0.0f); +} + +FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height) +{ + m_array = m.m_array; +} + +const FullMatrix & FullMatrix::operator=(const FullMatrix & m) +{ + nvCheck(width() == m.width()); + nvCheck(height() == m.height()); + + m_array = m.m_array; + + return *this; +} + + +float FullMatrix::getCoefficient(uint x, uint y) const +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + return m_array[y * width() + x]; +} + +void FullMatrix::setCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + m_array[y * width() + x] = f; +} + +void FullMatrix::addCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + m_array[y * width() + x] += f; +} + +void FullMatrix::mulCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + m_array[y * width() + x] *= f; +} + +float FullMatrix::dotRow(uint y, const FullVector & v) const +{ + nvDebugCheck( v.dimension() == width() ); + nvDebugCheck( y < height() ); + + float sum = 0; + + const uint count = v.dimension(); + for (uint i = 0; i < count; i++) + { + sum += m_array[y * count + i] * v[i]; + } + + return sum; +} + +void FullMatrix::madRow(uint y, float alpha, FullVector & v) const +{ + nvDebugCheck( v.dimension() == width() ); + nvDebugCheck( y < height() ); + + const uint count = v.dimension(); + for (uint i = 0; i < count; i++) + { + v[i] += m_array[y * count + i]; + } +} + + +// y = M * x +void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y) +{ + mult(NoTransposed, M, x, y); +} + +void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y) +{ + const uint w = M.width(); + const uint h = M.height(); + + if (TM == Transposed) + { + nvDebugCheck( h == x.dimension() ); + nvDebugCheck( w == y.dimension() ); + + y.fill(0.0f); + + for (uint i = 0; i < h; i++) + { + M.madRow(i, x[i], y); + } + } + else + { + nvDebugCheck( w == x.dimension() ); + nvDebugCheck( h == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + y[i] = M.dotRow(i, x); + } + } +} + +// y = alpha*A*x + beta*y +void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y) +{ + sgemv(alpha, NoTransposed, A, x, beta, y); +} + +void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y) +{ + const uint w = A.width(); + const uint h = A.height(); + + if (TA == Transposed) + { + nvDebugCheck( h == x.dimension() ); + nvDebugCheck( w == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + A.madRow(i, alpha * x[i], y); + } + } + else + { + nvDebugCheck( w == x.dimension() ); + nvDebugCheck( h == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + y[i] = alpha * A.dotRow(i, x) + beta * y[i]; + } + } +} + + +// Multiply a row of A by a column of B. +static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B) +{ + const uint w = (TA == NoTransposed) ? A.width() : A.height(); + nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width()); + + float sum = 0.0f; + + for (uint k = 0; k < w; k++) + { + const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop? + const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i); + sum += a * b; + } + + return sum; +} + + +// C = A * B +void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C) +{ + mult(NoTransposed, A, NoTransposed, B, C); +} + +void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C) +{ + sgemm(1.0f, TA, A, TB, B, 0.0f, C); +} + +// C = alpha*A*B + beta*C +void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C) +{ + sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C); +} + +void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C) +{ + const uint w = C.width(); + const uint h = C.height(); + + uint aw = (TA == NoTransposed) ? A.width() : A.height(); + uint ah = (TA == NoTransposed) ? A.height() : A.width(); + uint bw = (TB == NoTransposed) ? B.width() : B.height(); + uint bh = (TB == NoTransposed) ? B.height() : B.width(); + + nvDebugCheck(aw == bh); + nvDebugCheck(bw == ah); + nvDebugCheck(w == bw); + nvDebugCheck(h == ah); + + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y); + C.setCoefficient(x, y, c); + } + } +} + + + + + +/// Ctor. Init the size of the sparse matrix. +SparseMatrix::SparseMatrix(uint d) : m_width(d) +{ + m_array.resize(d); +} + +/// Ctor. Init the size of the sparse matrix. +SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w) +{ + m_array.resize(h); +} + +SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width) +{ + m_array = m.m_array; +} + +const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m) +{ + nvCheck(width() == m.width()); + nvCheck(height() == m.height()); + + m_array = m.m_array; + + return *this; +} + + +// x is column, y is row +float SparseMatrix::getCoefficient(uint x, uint y) const +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + if (m_array[y][i].x == x) return m_array[y][i].v; + } + + return 0.0f; +} + +void SparseMatrix::setCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + if (m_array[y][i].x == x) + { + m_array[y][i].v = f; + return; + } + } + + Coefficient c = { x, f }; + m_array[y].append( c ); +} + +void SparseMatrix::addCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + if (m_array[y][i].x == x) + { + m_array[y][i].v += f; + return; + } + } + + Coefficient c = { x, f }; + m_array[y].append( c ); +} + +void SparseMatrix::mulCoefficient(uint x, uint y, float f) +{ + nvDebugCheck( x < width() ); + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + if (m_array[y][i].x == x) + { + m_array[y][i].v *= f; + return; + } + } + + Coefficient c = { x, f }; + m_array[y].append( c ); +} + + +float SparseMatrix::sumRow(uint y) const +{ + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + + /*float sum = 0; + for (uint i = 0; i < count; i++) + { + sum += m_array[y][i].v; + } + return sum;*/ + + KahanSum kahan; + + for (uint i = 0; i < count; i++) + { + kahan.add(m_array[y][i].v); + } + + return kahan.sum(); +} + +float SparseMatrix::dotRow(uint y, const FullVector & v) const +{ + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + + /*float sum = 0; + for (uint i = 0; i < count; i++) + { + sum += m_array[y][i].v * v[m_array[y][i].x]; + } + return sum;*/ + + KahanSum kahan; + + for (uint i = 0; i < count; i++) + { + kahan.add(m_array[y][i].v * v[m_array[y][i].x]); + } + + return kahan.sum(); +} + +void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const +{ + nvDebugCheck(y < height()); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + v[m_array[y][i].x] += alpha * m_array[y][i].v; + } +} + + +void SparseMatrix::clearRow(uint y) +{ + nvDebugCheck( y < height() ); + + m_array[y].clear(); +} + +void SparseMatrix::scaleRow(uint y, float f) +{ + nvDebugCheck( y < height() ); + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + m_array[y][i].v *= f; + } +} + +void SparseMatrix::normalizeRow(uint y) +{ + nvDebugCheck( y < height() ); + + float norm = 0.0f; + + const uint count = m_array[y].count(); + for (uint i = 0; i < count; i++) + { + float f = m_array[y][i].v; + norm += f * f; + } + + scaleRow(y, 1.0f / sqrtf(norm)); +} + + +void SparseMatrix::clearColumn(uint x) +{ + nvDebugCheck(x < width()); + + for (uint y = 0; y < height(); y++) + { + const uint count = m_array[y].count(); + for (uint e = 0; e < count; e++) + { + if (m_array[y][e].x == x) + { + m_array[y][e].v = 0.0f; + break; + } + } + } +} + +void SparseMatrix::scaleColumn(uint x, float f) +{ + nvDebugCheck(x < width()); + + for (uint y = 0; y < height(); y++) + { + const uint count = m_array[y].count(); + for (uint e = 0; e < count; e++) + { + if (m_array[y][e].x == x) + { + m_array[y][e].v *= f; + break; + } + } + } +} + +const Array & SparseMatrix::getRow(uint y) const +{ + return m_array[y]; +} + + +// y = M * x +void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y) +{ + mult(NoTransposed, M, x, y); +} + +void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y) +{ + const uint w = M.width(); + const uint h = M.height(); + + if (TM == Transposed) + { + nvDebugCheck( h == x.dimension() ); + nvDebugCheck( w == y.dimension() ); + + y.fill(0.0f); + + for (uint i = 0; i < h; i++) + { + M.madRow(i, x[i], y); + } + } + else + { + nvDebugCheck( w == x.dimension() ); + nvDebugCheck( h == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + y[i] = M.dotRow(i, x); + } + } +} + +// y = alpha*A*x + beta*y +void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y) +{ + sgemv(alpha, NoTransposed, A, x, beta, y); +} + +void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y) +{ + const uint w = A.width(); + const uint h = A.height(); + + if (TA == Transposed) + { + nvDebugCheck( h == x.dimension() ); + nvDebugCheck( w == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + A.madRow(i, alpha * x[i], y); + } + } + else + { + nvDebugCheck( w == x.dimension() ); + nvDebugCheck( h == y.dimension() ); + + for (uint i = 0; i < h; i++) + { + y[i] = alpha * A.dotRow(i, x) + beta * y[i]; + } + } +} + + +// dot y-row of A by x-column of B +static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B) +{ + const Array & row = A.getRow(y); + + const uint count = row.count(); + + /*float sum = 0.0f; + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + sum += c.v * B.getCoefficient(x, c.x); + } + return sum;*/ + + KahanSum kahan; + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + kahan.add(c.v * B.getCoefficient(x, c.x)); + } + + return kahan.sum(); +} + +// dot y-row of A by x-row of B +static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B) +{ + const Array & row = A.getRow(y); + + const uint count = row.count(); + + /*float sum = 0.0f; + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + sum += c.v * B.getCoefficient(c.x, x); + } + //return sum;*/ + + KahanSum kahan; + for (uint i = 0; i < count; i++) + { + const SparseMatrix::Coefficient & c = row[i]; + kahan.add(c.v * B.getCoefficient(c.x, x)); + } + + return kahan.sum(); +} + +// dot y-column of A by x-column of B +static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B) +{ + nvDebugCheck(A.height() == B.height()); + + const uint h = A.height(); + + /*float sum = 0.0f; + for (uint i = 0; i < h; i++) + { + sum += A.getCoefficient(y, i) * B.getCoefficient(x, i); + } + //return sum;*/ + + KahanSum kahan; + for (uint i = 0; i < h; i++) + { + kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i)); + } + + return kahan.sum(); +} + + + +// C = A * B +void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C) +{ + mult(NoTransposed, A, NoTransposed, B, C); +} + +void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C) +{ + sgemm(1.0f, TA, A, TB, B, 0.0f, C); +} + +// C = alpha*A*B + beta*C +void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C) +{ + sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C); +} + +void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C) +{ + const uint w = C.width(); + const uint h = C.height(); + + uint aw = (TA == NoTransposed) ? A.width() : A.height(); + uint ah = (TA == NoTransposed) ? A.height() : A.width(); + uint bw = (TB == NoTransposed) ? B.width() : B.height(); + uint bh = (TB == NoTransposed) ? B.height() : B.width(); + + nvDebugCheck(aw == bh); + nvDebugCheck(bw == ah); + nvDebugCheck(w == bw); + nvDebugCheck(h == ah); + + + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + float c = beta * C.getCoefficient(x, y); + + if (TA == NoTransposed && TB == NoTransposed) + { + // dot y-row of A by x-column of B. + c += alpha * dotRowColumn(y, A, x, B); + } + else if (TA == Transposed && TB == Transposed) + { + // dot y-column of A by x-row of B. + c += alpha * dotRowColumn(x, B, y, A); + } + else if (TA == Transposed && TB == NoTransposed) + { + // dot y-column of A by x-column of B. + c += alpha * dotColumnColumn(y, A, x, B); + } + else if (TA == NoTransposed && TB == Transposed) + { + // dot y-row of A by x-row of B. + c += alpha * dotRowRow(y, A, x, B); + } + + if (c != 0.0f) + { + C.setCoefficient(x, y, c); + } + } + } + +} + +// C = At * A +void nv::sqm(const SparseMatrix & A, SparseMatrix & C) +{ + // This is quite expensive... + mult(Transposed, A, NoTransposed, A, C); +} diff --git a/src/nvmath/Sparse.h b/src/nvmath/Sparse.h new file mode 100644 index 0000000..29439f8 --- /dev/null +++ b/src/nvmath/Sparse.h @@ -0,0 +1,198 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_SPARSE_H +#define NV_MATH_SPARSE_H + +#include +#include + +// Full and sparse vector and matrix classes. BLAS subset. + +namespace nv +{ + class FullVector; + class FullMatrix; + class SparseMatrix; + + + /// Fixed size vector class. + class FullVector + { + public: + + FullVector(uint dim); + FullVector(const FullVector & v); + + const FullVector & operator=(const FullVector & v); + + uint dimension() const { return m_array.count(); } + + const float & operator[]( uint index ) const { return m_array[index]; } + float & operator[] ( uint index ) { return m_array[index]; } + + void fill(float f); + + void operator+= (const FullVector & v); + void operator-= (const FullVector & v); + void operator*= (const FullVector & v); + + void operator+= (float f); + void operator-= (float f); + void operator*= (float f); + + + private: + + Array m_array; + + }; + + // Pseudo-BLAS interface. + NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y + NVMATH_API void copy(const FullVector & x, FullVector & y); + NVMATH_API void scal(float a, FullVector & x); + NVMATH_API float dot(const FullVector & x, const FullVector & y); + + + enum Transpose + { + NoTransposed = 0, + Transposed = 1 + }; + + /// Full matrix class. + class FullMatrix + { + public: + + FullMatrix(uint d); + FullMatrix(uint w, uint h); + FullMatrix(const FullMatrix & m); + + const FullMatrix & operator=(const FullMatrix & m); + + uint width() const { return m_width; } + uint height() const { return m_height; } + bool isSquare() const { return m_width == m_height; } + + float getCoefficient(uint x, uint y) const; + + void setCoefficient(uint x, uint y, float f); + void addCoefficient(uint x, uint y, float f); + void mulCoefficient(uint x, uint y, float f); + + float dotRow(uint y, const FullVector & v) const; + void madRow(uint y, float alpha, FullVector & v) const; + + protected: + + bool isValid() const { + return m_array.size() == (m_width * m_height); + } + + private: + + const uint m_width; + const uint m_height; + Array m_array; + + }; + + NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y); + NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y); + + // y = alpha*A*x + beta*y + NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y); + NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y); + + NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C); + NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C); + + // C = alpha*A*B + beta*C + NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C); + NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C); + + + /** + * Sparse matrix class. The matrix is assumed to be sparse and to have + * very few non-zero elements, for this reason it's stored in indexed + * format. To multiply column vectors efficiently, the matrix stores + * the elements in indexed-column order, there is a list of indexed + * elements for each row of the matrix. As with the FullVector the + * dimension of the matrix is constant. + **/ + class SparseMatrix + { + friend class FullMatrix; + public: + + // An element of the sparse array. + struct Coefficient { + uint x; // column + float v; // value + }; + + + public: + + SparseMatrix(uint d); + SparseMatrix(uint w, uint h); + SparseMatrix(const SparseMatrix & m); + + const SparseMatrix & operator=(const SparseMatrix & m); + + + uint width() const { return m_width; } + uint height() const { return m_array.count(); } + bool isSquare() const { return width() == height(); } + + float getCoefficient(uint x, uint y) const; // x is column, y is row + + void setCoefficient(uint x, uint y, float f); + void addCoefficient(uint x, uint y, float f); + void mulCoefficient(uint x, uint y, float f); + + float sumRow(uint y) const; + float dotRow(uint y, const FullVector & v) const; + void madRow(uint y, float alpha, FullVector & v) const; + + void clearRow(uint y); + void scaleRow(uint y, float f); + void normalizeRow(uint y); + + void clearColumn(uint x); + void scaleColumn(uint x, float f); + + const Array & getRow(uint y) const; + + private: + + /// Number of columns. + const uint m_width; + + /// Array of matrix elements. + Array< Array > m_array; + + }; + + NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y); + NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y); + + // y = alpha*A*x + beta*y + NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y); + NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y); + + NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C); + NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C); + + // C = alpha*A*B + beta*C + NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C); + NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C); + + // C = At * A + NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C); + +} // nv namespace + + +#endif // NV_MATH_SPARSE_H diff --git a/src/nvmath/SphericalHarmonic.h b/src/nvmath/SphericalHarmonic.h index 7e8341c..a925d93 100644 --- a/src/nvmath/SphericalHarmonic.h +++ b/src/nvmath/SphericalHarmonic.h @@ -3,6 +3,7 @@ #ifndef NV_MATH_SPHERICALHARMONIC_H #define NV_MATH_SPHERICALHARMONIC_H +#include // memcpy #include namespace nv diff --git a/src/nvmath/TriBox.cpp b/src/nvmath/TriBox.cpp index 61d69bb..f58af13 100644 --- a/src/nvmath/TriBox.cpp +++ b/src/nvmath/TriBox.cpp @@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg if(min>rad || max<-rad) return false; -bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) +bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) { // use separating axis theorem to test overlap between triangle and box // need to test for overlap in these directions: @@ -170,7 +170,7 @@ bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Trian } -bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) +bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) { // use separating axis theorem to test overlap between triangle and box // need to test for overlap in these directions: diff --git a/src/nvmath/TypeSerialization.cpp b/src/nvmath/TypeSerialization.cpp new file mode 100644 index 0000000..d70ddeb --- /dev/null +++ b/src/nvmath/TypeSerialization.cpp @@ -0,0 +1,82 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#include + +#include +#include +#include +#include +#include + +#include + +using namespace nv; + +Stream & nv::operator<< (Stream & s, Vector2 & v) +{ + float x = v.x(); + float y = v.y(); + + s << x << y; + + if (s.isLoading()) + { + v.set(x, y); + } + + return s; +} + +Stream & nv::operator<< (Stream & s, Vector3 & v) +{ + float x = v.x(); + float y = v.y(); + float z = v.z(); + + s << x << y << z; + + if (s.isLoading()) + { + v.set(x, y, z); + } + + return s; +} + +Stream & nv::operator<< (Stream & s, Vector4 & v) +{ + float x = v.x(); + float y = v.y(); + float z = v.z(); + float w = v.w(); + + s << x << y << z << w; + + if (s.isLoading()) + { + v.set(x, y, z, w); + } + + return s; +} + +Stream & nv::operator<< (Stream & s, Matrix & m) +{ + return s; +} + +Stream & nv::operator<< (Stream & s, Quaternion & q) +{ + return s << q.asVector(); +} + +Stream & nv::operator<< (Stream & s, Basis & basis) +{ + return s << basis.tangent << basis.bitangent << basis.normal; +} + +Stream & nv::operator<< (Stream & s, Box & box) +{ + return s << box.m_mins << box.m_maxs; +} + diff --git a/src/nvmath/TypeSerialization.h b/src/nvmath/TypeSerialization.h new file mode 100644 index 0000000..3c50a24 --- /dev/null +++ b/src/nvmath/TypeSerialization.h @@ -0,0 +1,32 @@ +// This code is in the public domain -- castanyo@yahoo.es + +#ifndef NV_MATH_TYPESERIALIZATION_H +#define NV_MATH_TYPESERIALIZATION_H + +#include + +namespace nv +{ + class Stream; + + class Vector2; + class Vector3; + class Vector4; + + class Matrix; + class Quaternion; + struct Basis; + class Box; + + NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj); + NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj); + NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj); + + NVMATH_API Stream & operator<< (Stream & s, Matrix & obj); + NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj); + NVMATH_API Stream & operator<< (Stream & s, Basis & obj); + NVMATH_API Stream & operator<< (Stream & s, Box & obj); + +} // nv namespace + +#endif // NV_MATH_TYPESERIALIZATION_H diff --git a/src/nvmath/Vector.h b/src/nvmath/Vector.h index bffdfbe..d474090 100644 --- a/src/nvmath/Vector.h +++ b/src/nvmath/Vector.h @@ -4,7 +4,7 @@ #define NV_MATH_VECTOR_H #include -#include // min, max +#include // min, max namespace nv { @@ -71,6 +71,7 @@ public: const Vector2 & xy() const; scalar component(uint idx) const; + void setComponent(uint idx, scalar f); const scalar * ptr() const; @@ -239,13 +240,21 @@ inline const Vector2 & Vector3::xy() const inline scalar Vector3::component(uint idx) const { nvDebugCheck(idx < 3); - if (idx == 0) return x(); - if (idx == 1) return y(); - if (idx == 2) return z(); + if (idx == 0) return m_x; + if (idx == 1) return m_y; + if (idx == 2) return m_z; nvAssume(false); return 0.0f; } +inline void Vector3::setComponent(uint idx, float f) +{ + nvDebugCheck(idx < 3); + if (idx == 0) m_x = f; + else if (idx == 1) m_y = f; + else if (idx == 2) m_z = f; +} + inline const scalar * Vector3::ptr() const { return &m_x; @@ -477,6 +486,35 @@ inline scalar length(Vector2::Arg v) return sqrtf(length_squared(v)); } +inline scalar inverse_length(Vector2::Arg v) +{ + return 1.0f / sqrtf(length_squared(v)); +} + +inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON) +{ + return equal(length(v), 1, epsilon); +} + +inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON) +{ + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector2 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; +} + +inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON) +{ + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); +} + + inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) { return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon); @@ -595,6 +633,11 @@ inline scalar length(Vector3::Arg v) return sqrtf(length_squared(v)); } +inline scalar inverse_length(Vector3::Arg v) +{ + return 1.0f / sqrtf(length_squared(v)); +} + inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON) { return equal(length(v), 1, epsilon); @@ -716,6 +759,11 @@ inline scalar length(Vector4::Arg v) return sqrtf(length_squared(v)); } +inline scalar inverse_length(Vector4::Arg v) +{ + return 1.0f / sqrtf(length_squared(v)); +} + inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON) { return equal(length(v), 1, epsilon); diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index 7710921..1bba967 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -136,6 +136,11 @@ inline float lerp(float f0, float f1, float t) return f0 * s + f1 * t; } +inline float square(float f) +{ + return f * f; +} + } // nv #endif // NV_MATH_H