Merge private branch.

2008-04-17 06:58:43 +00:00 · 2008-04-17 06:58:43 +00:00 · 17a4f765fb
commit 17a4f765fb
parent cb91740591
20 changed files with 2440 additions and 43 deletions
--- a/src/nvmath/Basis.cpp
+++ b/src/nvmath/Basis.cpp
@ -49,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
 			return;
 		}
 	}
-	normal = ::normalize(normal, epsilon);
+	normal = nv::normalize(normal, epsilon);
 	
 	tangent -= normal * dot(normal, tangent);
 	bitangent -= normal * dot(normal, bitangent);
@ -68,7 +68,8 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
 	}
 	else
 	{
-		tangent = ::normalize(tangent, epsilon);
+#if 0
+		tangent = nv::normalize(tangent, epsilon);
 		bitangent -= tangent * dot(tangent, bitangent);
 		
 		if (length(bitangent) < epsilon)
@ -78,11 +79,47 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
 		}
 		else
 		{
-			tangent = ::normalize(tangent, epsilon);
+			bitangent = nv::normalize(bitangent, epsilon);
 		}
+#else
+		if (length(bitangent) < epsilon)
+		{
+			bitangent = cross(tangent, normal);
+			nvCheck(isNormalized(bitangent));
+		}
+		else
+		{
+			tangent = nv::normalize(tangent);
+			bitangent = nv::normalize(bitangent);
+			
+			Vector3 bisector = nv::normalize(tangent + bitangent);
+			Vector3 axis = cross(bisector, normal);
+			
+			nvDebugCheck(isNormalized(axis, epsilon));
+			nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
+			
+			if (dot(axis, tangent) > 0)
+			{
+				tangent = nv::normalize(bisector + axis);
+				bitangent = nv::normalize(bisector - axis);
+			}
+			else
+			{
+				tangent = nv::normalize(bisector - axis);
+				bitangent = nv::normalize(bisector + axis);
+			}
+		}
+#endif
 	}
 	
-	// Check vector lengths.
+	/*// Check vector lengths.
+	if (!isNormalized(normal, epsilon))
+	{
+		nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z());
+		nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z());
+		nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z());
+	}*/
+	
 	nvCheck(isNormalized(normal, epsilon));
 	nvCheck(isNormalized(tangent, epsilon));
 	nvCheck(isNormalized(bitangent, epsilon));
@ -125,9 +162,18 @@ void Basis::buildFrameForDirection(Vector3::Arg d)
 	bitangent = cross(normal, tangent);
 }

+bool Basis::isValid() const
+{
+	if (equal(normal, Vector3(zero))) return false;
+	if (equal(tangent, Vector3(zero))) return false;
+	if (equal(bitangent, Vector3(zero))) return false;
+
+	if (equal(determinant(), 0.0f)) return false;
+	
+	return true;
+}


-/*
 /// Transform by this basis. (From this basis to object space).
 Vector3 Basis::transform(Vector3::Arg v) const
 {
@ -144,30 +190,31 @@ Vector3 Basis::transformT(Vector3::Arg v)
 }

 /// Transform by the inverse. (From object space to this basis).
-/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
+/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
 Vector3 Basis::transformI(Vector3::Arg v) const
 {
 	const float det = determinant();
-	nvCheck(!equalf(det, 0.0f));
+	nvDebugCheck(!equal(det, 0.0f, 0.0f));
 	
 	const float idet = 1.0f / det;

 	// Rows of the inverse matrix.
-	Vector3 r0, r1, r2;
-	r0.x =  (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
-	r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
-	r0.z =  (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;
+	Vector3 r0(
+		 (bitangent.y() * normal.z() - bitangent.z() * normal.y()),
+		-(bitangent.x() * normal.z() - bitangent.z() * normal.x()),
+		 (bitangent.x() * normal.y() - bitangent.y() * normal.x()));

-	r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
-	r1.y =  (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
-	r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;
+	Vector3 r1(
+		-(tangent.y() * normal.z() - tangent.z() * normal.y()),
+		 (tangent.x() * normal.z() - tangent.z() * normal.x()),
+		-(tangent.x() * normal.y() - tangent.y() * normal.x()));

-	r2.x =  (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
-	r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
-	r2.z =  (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;
+	Vector3 r2(
+		 (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()),
+		-(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()),
+		 (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()));

-	return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
+	return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
 }	
-*/


--- a/src/nvmath/Basis.h
+++ b/src/nvmath/Basis.h
@ -54,7 +54,8 @@ namespace nv
 				tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
 		}
 		
-		/*
+		bool isValid() const;
+		
 		// Get transform matrix for this basis.
 		NVMATH_API Matrix matrix() const;
 		
@ -66,7 +67,7 @@ namespace nv

 		// Transform by the inverse. (From object space to this basis).
 		NVMATH_API Vector3 transformI(Vector3::Arg v) const;
-		*/
+
 		
 		Vector3 tangent;
 		Vector3 bitangent;
--- a/src/nvmath/Box.h
+++ b/src/nvmath/Box.h
@ -9,6 +9,7 @@

 namespace nv
 {
+class Stream;

 /// Axis Aligned Bounding Box.
 class Box
@ -27,11 +28,13 @@ public:
 	// Cast operators.
 	operator const float * () const { return reinterpret_cast<const float *>(this); }

-	/// Min corner of the box.
-	Vector3 mins() const { return m_mins; }
+	// Min corner of the box.
+	Vector3 minCorner() const { return m_mins; }
+	Vector3 & minCorner() { return m_mins; }

-	/// Max corner of the box.
-	Vector3 maxs() const { return m_maxs; }
+	// Max corner of the box.
+	Vector3 maxCorner() const { return m_maxs; }
+	Vector3 & maxCorner() { return m_maxs; }

 	/// Clear the bounds.
 	void clearBounds()
@ -108,7 +111,7 @@ public:
 	float area() const
 	{
 		const Vector3 d = extents();
-		return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
+		return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
 	}	

 	/// Get the volume of the box.
@ -118,6 +121,16 @@ public:
 		return 8.0f * (d.x() * d.y() * d.z());
 	}
 	
+	/// Return true if the box contains the given point.
+	bool contains(Vector3::Arg p) const
+	{
+		return 
+			m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
+			m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
+	}
+
+	friend Stream & operator<< (Stream & s, Box & box);
+
 private:

 	Vector3 m_mins;
@ -125,15 +138,6 @@ private:
 };


-/*
-/// Point inside box test.
-inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
-{
-	return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
-		m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
-}
-*/
-

 } // nv namespace

--- a/src/nvmath/CMakeLists.txt
+++ b/src/nvmath/CMakeLists.txt
@ -5,13 +5,19 @@ SET(MATH_SRCS
 	Vector.h
 	Matrix.h
 	Quaternion.h
+	Plane.h Plane.cpp
 	Box.h
 	Color.h
 	Montecarlo.h Montecarlo.cpp
 	Random.h Random.cpp
 	SphericalHarmonic.h SphericalHarmonic.cpp
 	Basis.h Basis.cpp
-	Triangle.h Triangle.cpp TriBox.cpp)
+	Triangle.h Triangle.cpp TriBox.cpp
+	Polygon.h Polygon.cpp
+	TypeSerialization.h TypeSerialization.cpp
+	Sparse.h Sparse.cpp
+	Solver.h Solver.cpp
+	KahanSum.h)

 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

--- a/src/nvmath/KahanSum.h
+++ b/src/nvmath/KahanSum.h
@ -0,0 +1,38 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#ifndef NV_MATH_KAHANSUM_H
+#define NV_MATH_KAHANSUM_H
+
+#include <nvmath/nvmath.h>
+
+namespace nv
+{
+
+	class KahanSum
+	{
+	public:
+		KahanSum() : accum(0.0f), err(0) {};
+		
+		void add(float f)
+		{
+			float compensated = f + err;
+			float tmp = accum + compensated;
+			err = accum - tmp;
+			err += compensated;
+			accum = tmp;
+		}
+		
+		float sum() const
+		{
+			return accum;
+		}
+		
+	private:
+		float accum;
+		float err;
+	};
+	
+} // nv namespace
+
+
+#endif // NV_MATH_KAHANSUM_H
--- a/src/nvmath/Plane.cpp
+++ b/src/nvmath/Plane.cpp
@ -0,0 +1,17 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include "Plane.h"
+#include "Matrix.h"
+
+namespace nv
+{
+	Plane transformPlane(const Matrix& m, Plane::Arg p)
+	{
+		Vector3 newVec = transformVector(m, p.vector());
+
+		Vector3 ptInPlane = p.offset() * p.vector();
+		ptInPlane = transformPoint(m, ptInPlane);
+
+		return Plane(newVec, ptInPlane);
+	}
+}
--- a/src/nvmath/Plane.h
+++ b/src/nvmath/Plane.h
@ -0,0 +1,77 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#ifndef NV_MATH_PLANE_H
+#define NV_MATH_PLANE_H
+
+#include <nvmath/nvmath.h>
+#include <nvmath/Vector.h>
+
+namespace nv
+{
+	class Matrix;
+
+
+	class NVMATH_CLASS Plane
+	{
+	public:
+		typedef Plane const & Arg;
+		
+		Plane();
+		Plane(float x, float y, float z, float w);
+		Plane(Vector4::Arg v);
+		Plane(Vector3::Arg v, float d);
+		Plane(Vector3::Arg normal, Vector3::Arg point);
+		
+		const Plane & operator=(Plane::Arg v);
+		
+		Vector3 vector() const;
+		scalar offset() const;
+		
+		const Vector4 & asVector() const;
+		Vector4 & asVector();
+		
+		void operator*=(scalar s);
+
+	private:
+		Vector4 p;
+	};
+
+	inline Plane::Plane() {}
+	inline Plane::Plane(float x, float y, float z, float w) : p(x, y, z, w) {}
+	inline Plane::Plane(Vector4::Arg v) : p(v) {}
+	inline Plane::Plane(Vector3::Arg v, float d) : p(v, d) {}
+	inline Plane::Plane(Vector3::Arg normal, Vector3::Arg point) : p(normal, dot(normal, point)) {}
+	
+	inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; }
+	
+	inline Vector3 Plane::vector() const { return p.xyz(); }
+	inline scalar Plane::offset() const { return p.w(); }
+
+	inline const Vector4 & Plane::asVector() const { return p; }
+	inline Vector4 & Plane::asVector() { return p; }
+
+	// Normalize plane.
+	inline Plane normalize(Plane::Arg plane, float epsilon = NV_EPSILON)
+	{
+		const float len = length(plane.vector());
+		nvDebugCheck(!isZero(len, epsilon));
+		const float inv = 1.0f / len;
+		return Plane(plane.asVector() * inv);
+	}
+
+	// Get the distance from the given point to this plane.
+	inline float distance(Plane::Arg plane, Vector3::Arg point)
+	{
+		return dot(plane.vector(), point) - plane.offset();
+	}
+
+	inline void Plane::operator*=(scalar s)
+	{
+		scale(p, s);
+	}
+
+	Plane transformPlane(const Matrix&, Plane::Arg);
+	
+} // nv namespace
+
+#endif // NV_MATH_PLANE_H
--- a/src/nvmath/Polygon.cpp
+++ b/src/nvmath/Polygon.cpp
@ -0,0 +1,168 @@
+// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
+
+#include <nvmath/Polygon.h>
+
+#include <nvmath/Triangle.h>
+#include <nvmath/Plane.h>
+
+using namespace nv;
+
+
+Polygon::Polygon()
+{
+}
+
+Polygon::Polygon(const Triangle & t)
+{
+	pointArray.resize(3);
+	pointArray[0] = t.v[0];
+	pointArray[1] = t.v[1];
+	pointArray[2] = t.v[2];
+}
+
+Polygon::Polygon(const Vector3 * points, uint vertexCount)
+{
+	pointArray.resize(vertexCount);
+	
+	for (uint i = 0; i < vertexCount; i++)
+	{
+		pointArray[i] = points[i];
+	}
+}
+
+
+/// Compute polygon area.
+float Polygon::area() const
+{
+	float total = 0;
+	
+	const uint pointCount = pointArray.count();
+    for (uint i = 2; i < pointCount; i++)
+    {
+		Vector3 v1 = pointArray[i-1] - pointArray[0];
+		Vector3 v2 = pointArray[i] - pointArray[0];
+		
+		total += 0.5f * length(cross(v1, v2));
+    }
+	
+    return total;	
+}
+
+/// Get the bounds of the polygon.
+Box Polygon::bounds() const
+{
+	Box bounds;
+	bounds.clearBounds();
+	foreach(p, pointArray)
+	{
+		bounds.addPointToBounds(pointArray[p]);
+	}
+	return bounds;
+}
+
+
+/// Get the plane of the polygon.
+Plane Polygon::plane() const
+{
+	// @@ Do something better than this?
+	Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]);
+	return Vector4(n, dot(n, pointArray[0]));
+}
+
+
+/// Clip polygon to box.
+uint Polygon::clipTo(const Box & box)
+{
+	const Plane posX( 1, 0, 0, box.maxCorner().x());
+	const Plane negX(-1, 0, 0,-box.minCorner().x());
+	const Plane posY( 0, 1, 0, box.maxCorner().y());
+	const Plane negY( 0,-1, 0,-box.minCorner().y());
+	const Plane posZ( 0, 0, 1, box.maxCorner().z());
+	const Plane negZ( 0, 0,-1,-box.minCorner().z());
+
+	if (clipTo(posX) == 0) return 0;
+	if (clipTo(negX) == 0) return 0;
+	if (clipTo(posY) == 0) return 0;
+	if (clipTo(negY) == 0) return 0;
+	if (clipTo(posZ) == 0) return 0;
+	if (clipTo(negZ) == 0) return 0;
+	
+	return pointArray.count();
+}
+
+
+/// Clip polygon to plane.
+uint Polygon::clipTo(const Plane & plane)
+{
+	int count = 0;
+
+	const uint pointCount = pointArray.count();
+	
+	Array<Vector3> newPointArray(pointCount + 1);	// @@ Do not create copy every time.
+	
+	Vector3 prevPoint = pointArray[pointCount - 1];
+	float prevDist = dot(plane.vector(), prevPoint) - plane.offset();
+	
+	for (uint i = 0; i < pointCount; i++)
+	{
+		const Vector3 point = pointArray[i];
+		float dist = dot(plane.vector(), point) - plane.offset();
+		
+		// @@ Handle points on plane better.
+		
+		if (dist <= 0) // interior.
+		{
+			if (prevDist > 0) // exterior
+			{
+				// Add segment intersection point.
+				Vector3 dp = point - prevPoint;
+				
+				float t = dist / prevDist;
+				newPointArray.append(point - dp * t);
+			}
+			
+			// Add interior point.
+			newPointArray.append(point);
+		}
+		else if (dist > 0 && prevDist < 0)
+		{
+			// Add segment intersection point.
+			Vector3 dp = point - prevPoint;
+			
+			float t = dist / prevDist;
+			newPointArray.append(point - dp * t);
+		}
+		
+		prevPoint = point;
+		prevDist = dist;
+	}
+
+	swap(pointArray, newPointArray);
+
+	return count;
+}
+
+
+void Polygon::removeColinearPoints()
+{
+	const uint pointCount = pointArray.count();
+	
+	Array<Vector3> newPointArray(pointCount);
+	
+    for (uint i = 0 ; i < pointCount; i++)
+    {
+        int j = (i + 1) % pointCount;
+        int k = (i + pointCount - 1) % pointCount;
+		
+		Vector3 v1 = normalize(pointArray[j] - pointArray[i]);
+		Vector3 v2 = normalize(pointArray[i] - pointArray[k]);
+		
+        if (dot(v1, v2) < 0.999)
+        {
+			newPointArray.append(pointArray[i]);
+        }
+    }
+
+	swap(pointArray, newPointArray);
+}
+
--- a/src/nvmath/Polygon.h
+++ b/src/nvmath/Polygon.h
@ -0,0 +1,45 @@
+// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
+
+#ifndef NV_MATH_POLYGON_H
+#define NV_MATH_POLYGON_H
+
+#include <nvcore/Containers.h>
+
+#include <nvmath/nvmath.h>
+#include <nvmath/Vector.h>
+#include <nvmath/Box.h>
+
+namespace nv
+{
+	class Box;
+	class Plane;
+	class Triangle;
+	
+
+	class Polygon
+	{
+		NV_FORBID_COPY(Polygon);
+	public:
+
+		Polygon();
+		Polygon(const Triangle & t);
+		Polygon(const Vector3 * points, uint vertexCount);
+
+		float area() const;
+		Box bounds() const;
+		Plane plane() const;
+
+		uint clipTo(const Box & box);
+		uint clipTo(const Plane & plane);
+
+		void removeColinearPoints();
+		
+	private:
+
+		Array<Vector3> pointArray;
+	};
+
+
+} // nv namespace
+
+#endif	// NV_MATH_POLYGON_H
--- a/src/nvmath/Quaternion.h
+++ b/src/nvmath/Quaternion.h
@ -51,7 +51,6 @@ namespace nv

 	inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
 	{
-		// @@ Efficient SIMD implementation?
 		return Quaternion(
 			+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
 			- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
@ -59,6 +58,40 @@ namespace nv
 			- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
 	}

+	inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
+	{
+		return Quaternion(
+			                + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
+			- a.x() * b.z()               + a.z()*b.x() + a.w()*b.y(),
+			+ a.x() * b.y() - a.y()*b.x()               + a.w()*b.z(),
+			- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() );
+	}
+
+	inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
+	{
+		return Quaternion(
+			+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y(),
+			- a.x() * b.z() + a.y()*b.w() + a.z()*b.x(),
+			+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w(),
+			- a.x() * b.x() - a.y()*b.y() - a.z()*b.z());
+	}
+
+	inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
+	{
+		return mul(a, b);
+	}
+
+	inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
+	{
+		return mul(a, b);
+	}
+
+	inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
+	{
+		return mul(a, b);
+	}
+
+
 	inline Quaternion scale(Quaternion::Arg q, float s)
 	{
 		return scale(q.asVector(), s);
@ -122,6 +155,24 @@ namespace nv
 		return Quaternion(Vector4(v * s, c));
 	}

+	inline Vector3 imag(Quaternion::Arg q)
+	{
+		return q.asVector().xyz();
+	}
+
+	inline float real(Quaternion::Arg q)
+	{
+		return q.w();
+	}
+
+
+	/// Transform vector.
+	inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
+	{
+		Quaternion t = q * v * conjugate(q);
+		return imag(t);
+	}
+

 } // nv namespace

--- a/src/nvmath/Solver.cpp
+++ b/src/nvmath/Solver.cpp
@ -0,0 +1,726 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include <nvmath/Solver.h>
+
+using namespace nv;
+
+namespace
+{
+	class Preconditioner
+	{
+	public:
+		// Virtual dtor.
+		virtual ~Preconditioner() { }
+	 
+		// Apply preconditioning step.
+		virtual void apply(const FullVector & x, FullVector & y) const = 0;
+	};
+
+
+	// Jacobi preconditioner.
+	class JacobiPreconditioner : public Preconditioner
+	{
+	public:
+
+		JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
+		{
+			nvCheck(M.isSquare());
+			
+			for(uint x = 0; x < M.width(); x++)
+			{
+				float elem = M.getCoefficient(x, x);
+				nvDebugCheck( elem != 0.0f );
+
+				if (symmetric) 
+				{
+					m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem));
+				}
+				else 
+				{
+					m_inverseDiagonal[x] = 1.0f / elem;
+				}
+			}
+		}
+
+		void apply(const FullVector & x, FullVector & y) const
+		{
+			y *= x;
+		}
+
+	private:
+
+		FullVector m_inverseDiagonal;
+
+	};
+
+} // namespace
+
+
+static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
+static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
+
+
+// Solve the symmetric system: At·A·x = At·b
+void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
+{
+	nvDebugCheck(A.width() == x.dimension());
+	nvDebugCheck(A.height() == b.dimension());
+	nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
+
+	const uint D = A.width();
+
+	FullVector Atb(D);
+	mult(Transposed, A, b, Atb);
+
+	SparseMatrix AtA(D);
+	mult(Transposed, A, NoTransposed, A, AtA);
+
+	SymmetricSolver(AtA, Atb, x, epsilon);
+}
+
+
+// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
+void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
+{
+	nvDebugCheck(A.width() == x.dimension());
+	nvDebugCheck(A.height() == b.dimension());
+	nvDebugCheck(A.height() >= A.width() - lockedCount);
+
+	// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
+
+	const uint D = A.width() - lockedCount;
+	nvDebugCheck(D > 0);
+
+	// Compute: b - Al * xl
+	FullVector b_Alxl(b);
+
+	for (uint y = 0; y < A.height(); y++)
+	{
+		const uint count = A.getRow(y).count();
+		for (uint e = 0; e < count; e++)
+		{
+			uint column = A.getRow(y)[e].x;
+
+			bool isFree = true;
+			for (uint i = 0; i < lockedCount; i++) 
+			{
+				isFree &= (lockedParameters[i] != column);
+			}
+
+			if (!isFree)
+			{
+				b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
+			}
+		}
+	}
+
+	// Remove locked columns from A.
+	SparseMatrix Af(D, A.height());
+
+	for (uint y = 0; y < A.height(); y++)
+	{
+		const uint count = A.getRow(y).count();
+		for (uint e = 0; e < count; e++)
+		{
+			uint column = A.getRow(y)[e].x;
+			uint ix = column;
+
+			bool isFree = true;
+			for (uint i = 0; i < lockedCount; i++) 
+			{
+				isFree &= (lockedParameters[i] != column);
+				if (column > lockedParameters[i]) ix--; // shift columns
+			}
+
+			if (isFree)
+			{
+				Af.setCoefficient(ix, y, A.getRow(y)[e].v);
+			}
+		}
+	}
+	
+	// Remove elements from x
+	FullVector xf(D);
+
+	for (uint i = 0, j = 0; i < A.width(); i++)
+	{
+		bool isFree = true;
+		for (uint l = 0; l < lockedCount; l++) 
+		{
+			isFree &= (lockedParameters[l] != i);
+		}
+
+		if (isFree)
+		{
+			xf[j++] = x[i];
+		}
+	}
+
+	// Solve reduced system.
+	LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
+
+	// Copy results back to x.
+	for (uint i = 0, j = 0; i < A.width(); i++)
+	{
+		bool isFree = true;
+		for (uint l = 0; l < lockedCount; l++) 
+		{
+			isFree &= (lockedParameters[l] != i);
+		}
+
+		if (isFree)
+		{
+			x[i] = xf[j++];
+		}
+	}
+}
+
+
+void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
+{
+	nvDebugCheck(A.height() == A.width());
+	nvDebugCheck(A.height() == b.dimension());
+	nvDebugCheck(b.dimension() == x.dimension());
+
+//	JacobiPreconditioner jacobi(A, true);
+
+//	ConjugateGradientSolver(jacobi, A, b, x, epsilon);
+	ConjugateGradientSolver(A, b, x, epsilon);
+}
+
+
+/**
+ * Compute the solution of the sparse linear system Ab=x using the Conjugate
+ * Gradient method.
+ *
+ * Solving sparse linear systems:
+ * (1)		A·x = b
+ * 
+ * The conjugate gradient algorithm solves (1) only in the case that A is 
+ * symmetric and positive definite. It is based on the idea of minimizing the 
+ * function
+ * 
+ * (2)		f(x) = 1/2·x·A·x - b·x
+ * 
+ * This function is minimized when its gradient
+ * 
+ * (3)		df = A·x - b
+ * 
+ * is zero, which is equivalent to (1). The minimization is carried out by 
+ * generating a succession of search directions p.k and improved minimizers x.k. 
+ * At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k·p.k), 
+ * and x.k+1 is set equal to the new point x.k + alfa.k·p.k. The p.k and x.k are 
+ * built up in such a way that x.k+1 is also the minimizer of f over the whole
+ * vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N 
+ * iterations you arrive at the minimizer over the entire vector space, i.e., the 
+ * solution to (1).
+ *
+ * For a really good explanation of the method see:
+ *
+ * "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
+ * Jonhathan Richard Shewchuk.
+ *
+**/
+/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
+{
+	nvDebugCheck( A.isSquare() );
+	nvDebugCheck( A.width() == b.dimension() );
+	nvDebugCheck( A.width() == x.dimension() );
+
+	int i = 0;
+	const int D = A.width();
+	const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
+
+	FullVector r(D);   // residual
+	FullVector p(D);   // search direction
+	FullVector q(D);   // 
+	float delta_0;
+	float delta_old;
+	float delta_new;
+	float alpha;
+	float beta;
+
+	// r = b - A·x;
+	copy(b, r);
+	sgemv(-1, A, x, 1, r);
+
+	// p = r;
+	copy(r, p);
+	
+	delta_new = dot( r, r );
+	delta_0 = delta_new;
+
+	while (i < i_max && delta_new > epsilon*epsilon*delta_0)
+	{
+		i++;
+
+		// q = A·p
+		mult(A, p, q);
+
+		// alpha = delta_new / p·q
+		alpha = delta_new / dot( p, q );
+
+		// x = alfa·p + x
+		saxpy(alpha, p, x);
+
+		if ((i & 31) == 0) // recompute r after 32 steps
+		{
+			// r = b - A·x
+			copy(b, r);
+			sgemv(-1, A, x, 1, r);
+		}
+		else
+		{
+			// r = r - alpha·q
+			saxpy(-alpha, q, r);
+		}
+
+		delta_old = delta_new;
+		delta_new = dot( r, r );
+
+		beta = delta_new / delta_old;
+
+		// p = r + beta·p
+		copy(r, p);
+		saxpy(beta, p, r);
+	}
+
+	return i;
+}
+
+
+// Conjugate gradient with preconditioner.
+/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
+{
+	nvDebugCheck( A.isSquare() );
+	nvDebugCheck( A.width() == b.dimension() );
+	nvDebugCheck( A.width() == x.dimension() );
+
+	int i = 0;
+	const int D = A.width();
+	const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
+
+	FullVector r(D);    // residual
+	FullVector p(D);    // search direction
+	FullVector q(D);    // 
+	FullVector s(D);    // preconditioned
+	float delta_0;
+	float delta_old;
+	float delta_new;
+	float alpha;
+	float beta;
+
+	// r = b - A·x
+	copy(b, r);
+	sgemv(-1, A, x, 1, r);
+
+
+	// p = M^-1 · r
+	preconditioner.apply(r, p);
+	//copy(r, p);
+
+
+	delta_new = dot(r, p);
+	delta_0 = delta_new;
+
+	while (i < i_max && delta_new > epsilon*epsilon*delta_0)
+	{
+		i++;
+
+		// q = A·p
+		mult(A, p, q);
+
+		// alpha = delta_new / p·q
+		alpha = delta_new / dot(p, q);
+
+		// x = alfa·p + x
+		saxpy(alpha, p, x);
+
+		if ((i & 31) == 0)  // recompute r after 32 steps
+		{			
+			// r = b - A·x
+			copy(b, r);
+			sgemv(-1, A, x, 1, r);
+		}
+		else
+		{
+			// r = r - alfa·q
+			saxpy(-alpha, q, r);
+		}
+
+		// s = M^-1 · r
+		preconditioner.apply(r, s);
+		//copy(r, s);
+
+		delta_old = delta_new;
+		delta_new = dot( r, s );
+
+		beta = delta_new / delta_old;
+
+		// p = s + beta·p
+		copy(s, p);
+		saxpy(beta, p, s);
+	}
+
+	return i;
+}
+
+
+#if 0 // Nonsymmetric solvers
+
+/** Bi-conjugate gradient method.  */
+MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
+	piDebugCheck( A.IsSquare() );
+	piDebugCheck( A.Width() == b.Dim() );
+	piDebugCheck( A.Width() == x.Dim() );
+
+	int i = 0;
+	const int D = A.Width();
+	const int i_max = 4 * D;
+
+	float resid;
+	float rho_1 = 0;
+	float rho_2 = 0;
+	float alpha;
+	float beta;
+
+	DenseVector r(D);
+	DenseVector rtilde(D);
+	DenseVector p(D);
+	DenseVector ptilde(D);
+	DenseVector q(D);
+	DenseVector qtilde(D);
+	DenseVector tmp(D);	// temporal vector.
+
+	// r = b - A·x;
+	A.Product( x, tmp );
+	r.Sub( b, tmp );
+
+	// rtilde = r
+	rtilde.Set( r );
+
+	// p = r;
+	p.Set( r );
+
+	// ptilde = rtilde
+	ptilde.Set( rtilde );
+
+
+
+	float normb = b.Norm();
+  	if( normb == 0.0 ) normb = 1;
+
+	// test convergence
+	resid = r.Norm() / normb;
+	if( resid < epsilon ) {
+		// method converges?
+		return 0;
+	}
+
+
+	while( i < i_max ) {
+
+		i++;
+		
+		rho_1 = DenseVectorDotProduct( r, rtilde );
+
+		if( rho_1 == 0 ) {
+			// method fails.
+			return -i;
+		}
+
+		if (i == 1) {
+			p.Set( r );
+			ptilde.Set( rtilde );
+		} 
+		else {
+			beta = rho_1 / rho_2;
+			
+			// p = r + beta * p;
+			p.Mad( r, p, beta );
+			
+			// ptilde = ztilde + beta * ptilde;
+			ptilde.Mad( rtilde, ptilde, beta );
+		}
+
+		// q = A * p;
+		A.Product( p, q );
+
+		// qtilde = A^t * ptilde;
+		A.TransProduct( ptilde, qtilde );
+
+		alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
+
+		// x += alpha * p;
+		x.Mad( x, p, alpha );
+
+		// r -= alpha * q;
+		r.Mad( r, q, -alpha );
+		
+		// rtilde -= alpha * qtilde;
+		rtilde.Mad( rtilde, qtilde, -alpha );
+
+		rho_2 = rho_1;
+
+		// test convergence
+		resid = r.Norm() / normb;
+		if( resid < epsilon ) {
+			// method converges
+			return i;
+		}
+	}
+
+	return i;
+}
+
+
+/** Bi-conjugate gradient stabilized method. */
+int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
+	piDebugCheck( A.IsSquare() );
+	piDebugCheck( A.Width() == b.Dim() );
+	piDebugCheck( A.Width() == x.Dim() );
+
+	int i = 0;
+	const int D = A.Width();
+	const int i_max = 2 * D;
+
+
+	float resid;
+	float rho_1 = 0;
+	float rho_2 = 0;
+	float alpha = 0;
+	float beta = 0;
+	float omega = 0;
+
+	DenseVector p(D);
+	DenseVector phat(D);
+	DenseVector s(D);
+	DenseVector shat(D);
+	DenseVector t(D);
+	DenseVector v(D);
+
+	DenseVector r(D);
+	DenseVector rtilde(D);
+
+	DenseVector tmp(D);
+
+	// r = b - A·x;
+	A.Product( x, tmp );
+	r.Sub( b, tmp );
+
+	// rtilde = r
+	rtilde.Set( r );
+
+
+	float normb = b.Norm();
+	if( normb == 0.0 ) normb = 1;
+  
+	// test convergence
+	resid = r.Norm() / normb;
+	if( resid < epsilon ) {
+		// method converges?
+		return 0;
+	}
+
+
+	while( i<i_max ) {
+
+		i++;
+	
+		rho_1 = DenseVectorDotProduct( rtilde, r );
+		if( rho_1 == 0 ) {
+			// method fails
+			return -i;
+		}
+
+
+		if( i == 1 ) {
+			p.Set( r );
+		}
+		else {
+			beta = (rho_1 / rho_2) * (alpha / omega);
+
+			// p = r + beta * (p - omega * v);
+			p.Mad( p, v, -omega );
+			p.Mad( r, p, beta );
+		}
+
+		//phat = M.solve(p);
+		phat.Set( p );
+		//Precond( &phat, p );
+
+		//v = A * phat;
+		A.Product( phat, v );
+
+		alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
+
+		// s = r - alpha * v;
+		s.Mad( r, v, -alpha );
+
+
+		resid = s.Norm() / normb;
+		if( resid < epsilon ) {
+			// x += alpha * phat;
+			x.Mad( x, phat, alpha );
+			return i;
+		}
+
+		//shat = M.solve(s);
+		shat.Set( s );
+		//Precond( &shat, s );
+
+		//t = A * shat;
+		A.Product( shat, t );
+
+		omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
+
+		// x += alpha * phat + omega * shat;
+		x.Mad( x, shat, omega );
+		x.Mad( x, phat, alpha );
+
+		//r = s - omega * t;
+		r.Mad( s, t, -omega );
+
+		rho_2 = rho_1;
+		
+		resid = r.Norm() / normb;
+		if( resid < epsilon ) {
+			return i;
+		}
+
+		if( omega == 0 ) {
+			return -i;	// ???
+		}
+	}
+
+	return i;
+}
+
+
+/** Bi-conjugate gradient stabilized method. */
+int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
+	piDebugCheck( A.IsSquare() );
+	piDebugCheck( A.Width() == b.Dim() );
+	piDebugCheck( A.Width() == x.Dim() );
+
+	int i = 0;
+	const int D = A.Width();
+	const int i_max = D;
+//	const int i_max = 1000;
+
+
+	float resid;
+	float rho_1 = 0;
+	float rho_2 = 0;
+	float alpha = 0;
+	float beta = 0;
+	float omega = 0;
+
+	DenseVector p(D);
+	DenseVector phat(D);
+	DenseVector s(D);
+	DenseVector shat(D);
+	DenseVector t(D);
+	DenseVector v(D);
+
+	DenseVector r(D);
+	DenseVector rtilde(D);
+
+	DenseVector tmp(D);
+
+	// r = b - A·x;
+	A.Product( x, tmp );
+	r.Sub( b, tmp );
+
+	// rtilde = r
+	rtilde.Set( r );
+
+
+	float normb = b.Norm();
+	if( normb == 0.0 ) normb = 1;
+  
+	// test convergence
+	resid = r.Norm() / normb;
+	if( resid < epsilon ) {
+		// method converges?
+		return 0;
+	}
+
+
+	while( i<i_max ) {
+
+		i++;
+	
+		rho_1 = DenseVectorDotProduct( rtilde, r );
+		if( rho_1 == 0 ) {
+			// method fails
+			return -i;
+		}
+
+
+		if( i == 1 ) {
+			p.Set( r );
+		}
+		else {
+			beta = (rho_1 / rho_2) * (alpha / omega);
+
+			// p = r + beta * (p - omega * v);
+			p.Mad( p, v, -omega );
+			p.Mad( r, p, beta );
+		}
+
+		//phat = M.solve(p);
+		//phat.Set( p );
+		M.Precond( &phat, p );
+
+		//v = A * phat;
+		A.Product( phat, v );
+
+		alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
+
+		// s = r - alpha * v;
+		s.Mad( r, v, -alpha );
+
+
+		resid = s.Norm() / normb;
+
+		//printf( "--- Iteration %d: residual = %f\n", i, resid );
+
+		if( resid < epsilon ) {
+			// x += alpha * phat;
+			x.Mad( x, phat, alpha );
+			return i;
+		}
+
+		//shat = M.solve(s);
+		//shat.Set( s );
+		M.Precond( &shat, s );
+
+		//t = A * shat;
+		A.Product( shat, t );
+
+		omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
+
+		// x += alpha * phat + omega * shat;
+		x.Mad( x, shat, omega );
+		x.Mad( x, phat, alpha );
+
+		//r = s - omega * t;
+		r.Mad( s, t, -omega );
+
+		rho_2 = rho_1;
+		
+		resid = r.Norm() / normb;
+		if( resid < epsilon ) {
+			return i;
+		}
+
+		if( omega == 0 ) {
+			return -i;	// ???
+		}
+	}
+
+	return i;
+}
+
+#endif
--- a/src/nvmath/Solver.h
+++ b/src/nvmath/Solver.h
@ -0,0 +1,20 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#ifndef NV_MATH_SOLVER_H
+#define NV_MATH_SOLVER_H
+
+#include <nvmath/Sparse.h>
+
+namespace nv
+{
+
+	// Linear solvers.
+	NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
+	NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
+	NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
+//	NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
+
+} // nv namespace
+
+
+#endif // NV_MATH_SOLVER_H
--- a/src/nvmath/Sparse.cpp
+++ b/src/nvmath/Sparse.cpp
@ -0,0 +1,831 @@
+// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
+
+#include <nvmath/Sparse.h>
+#include <nvmath/KahanSum.h>
+
+using namespace nv;
+
+
+/// Ctor.
+FullVector::FullVector(uint dim)
+{ 
+	m_array.resize(dim); 
+}
+
+/// Copy ctor.
+FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
+{
+}
+
+/// Copy operator
+const FullVector & FullVector::operator=(const FullVector & v)
+{
+	nvCheck(dimension() == v.dimension());
+	
+	m_array = v.m_array;
+
+	return *this;
+}
+		
+
+void FullVector::fill(float f)
+{
+	const uint dim = dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		m_array[i] = f;
+	}
+}
+
+void FullVector::operator+= (const FullVector & v)
+{
+	nvDebugCheck(dimension() == v.dimension());
+
+	const uint dim = dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		m_array[i] += v.m_array[i];
+	}
+}
+
+void FullVector::operator-= (const FullVector & v)
+{
+	nvDebugCheck(dimension() == v.dimension());
+
+	const uint dim = dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		m_array[i] -= v.m_array[i];
+	}
+}
+
+void FullVector::operator*= (const FullVector & v)
+{
+	nvDebugCheck(dimension() == v.dimension());
+
+	const uint dim = dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		m_array[i] *= v.m_array[i];
+	}
+}
+
+void FullVector::operator+= (float f)
+{
+	const uint dim = dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		m_array[i] += f;
+	}
+}
+
+void FullVector::operator-= (float f)
+{
+	const uint dim = dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		m_array[i] -= f;
+	}
+}
+
+void FullVector::operator*= (float f)
+{
+	const uint dim = dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		m_array[i] *= f;
+	}
+}
+
+
+void nv::saxpy(float a, const FullVector & x, FullVector & y)
+{
+	nvDebugCheck(x.dimension() == y.dimension());
+
+	const uint dim = x.dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		y[i] += a * x[i];
+	}
+}
+
+void nv::copy(const FullVector & x, FullVector & y)
+{
+	nvDebugCheck(x.dimension() == y.dimension());
+
+	const uint dim = x.dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		y[i] = x[i];
+	}
+}
+
+void nv::scal(float a, FullVector & x)
+{
+	const uint dim = x.dimension();
+	for (uint i = 0; i < dim; i++)
+	{
+		x[i] *= a;
+	}
+}
+
+float nv::dot(const FullVector & x, const FullVector & y)
+{
+	nvDebugCheck(x.dimension() == y.dimension());
+
+	const uint dim = x.dimension();
+
+	/*float sum = 0;
+	for (uint i = 0; i < dim; i++)
+	{
+		sum += x[i] * y[i];
+	}
+	return sum;*/
+
+	KahanSum kahan;
+
+	for (uint i = 0; i < dim; i++)
+	{
+		kahan.add(x[i] * y[i]);
+	}
+
+	return kahan.sum();
+}
+
+
+FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
+{
+	m_array.resize(d*d, 0.0f);
+}
+
+FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
+{
+	m_array.resize(w*h, 0.0f);
+}
+
+FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
+{
+	m_array = m.m_array;
+}
+
+const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
+{
+	nvCheck(width() == m.width());
+	nvCheck(height() == m.height());
+
+	m_array = m.m_array;
+	
+	return *this;
+}
+
+
+float FullMatrix::getCoefficient(uint x, uint y) const
+{
+	nvDebugCheck( x < width() );
+	nvDebugCheck( y < height() );
+	
+	return m_array[y * width() + x];
+}
+
+void FullMatrix::setCoefficient(uint x, uint y, float f)
+{
+	nvDebugCheck( x < width() );
+	nvDebugCheck( y < height() );
+	
+	m_array[y * width() + x] = f;
+}
+
+void FullMatrix::addCoefficient(uint x, uint y, float f)
+{
+	nvDebugCheck( x < width() );
+	nvDebugCheck( y < height() );
+	
+	m_array[y * width() + x] += f;
+}
+
+void FullMatrix::mulCoefficient(uint x, uint y, float f)
+{
+	nvDebugCheck( x < width() );
+	nvDebugCheck( y < height() );
+	
+	m_array[y * width() + x] *= f;
+}
+
+float FullMatrix::dotRow(uint y, const FullVector & v) const
+{
+	nvDebugCheck( v.dimension() == width() );
+	nvDebugCheck( y < height() );
+
+	float sum = 0;
+
+	const uint count = v.dimension();
+	for (uint i = 0; i < count; i++)
+	{
+		sum += m_array[y * count + i] * v[i];
+	}
+
+	return sum;
+}
+
+void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
+{
+	nvDebugCheck( v.dimension() == width() );
+	nvDebugCheck( y < height() );
+
+	const uint count = v.dimension();
+	for (uint i = 0; i < count; i++)
+	{
+		v[i] += m_array[y * count + i];
+	}
+}
+
+
+// y = M * x
+void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
+{
+	mult(NoTransposed, M, x, y);
+}
+
+void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
+{
+	const uint w = M.width();
+	const uint h = M.height();
+
+	if (TM == Transposed)
+	{
+		nvDebugCheck( h == x.dimension() );
+		nvDebugCheck( w == y.dimension() );
+
+		y.fill(0.0f);
+
+		for (uint i = 0; i < h; i++)
+		{
+			M.madRow(i, x[i], y);
+		}
+	}
+	else
+	{
+		nvDebugCheck( w == x.dimension() );
+		nvDebugCheck( h == y.dimension() );
+
+		for (uint i = 0; i < h; i++)
+		{
+			y[i] = M.dotRow(i, x);
+		}
+	}
+}
+
+// y = alpha*A*x + beta*y
+void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
+{
+	sgemv(alpha, NoTransposed, A, x, beta, y);
+}
+
+void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
+{
+	const uint w = A.width();
+	const uint h = A.height();
+
+	if (TA == Transposed)
+	{
+		nvDebugCheck( h == x.dimension() );
+		nvDebugCheck( w == y.dimension() );
+
+		for (uint i = 0; i < h; i++)
+		{
+			A.madRow(i, alpha * x[i], y);
+		}
+	}
+	else
+	{
+		nvDebugCheck( w == x.dimension() );
+		nvDebugCheck( h == y.dimension() );
+
+		for (uint i = 0; i < h; i++)
+		{
+			y[i] = alpha * A.dotRow(i, x) + beta * y[i];
+		}
+	}
+}
+
+
+// Multiply a row of A by a column of B.
+static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
+{
+	const uint w = (TA == NoTransposed) ? A.width() : A.height();
+	nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width());
+
+	float sum = 0.0f;
+
+	for (uint k = 0; k < w; k++)
+	{
+		const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k);	// @@ Move branches out of the loop?
+		const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
+		sum += a * b;
+	}
+
+	return sum;
+}
+
+
+// C = A * B
+void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
+{
+	mult(NoTransposed, A, NoTransposed, B, C);
+}
+
+void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
+{
+	sgemm(1.0f, TA, A, TB, B, 0.0f, C);
+}
+
+// C = alpha*A*B + beta*C
+void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
+{
+	sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
+}
+
+void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
+{
+	const uint w = C.width();
+	const uint h = C.height();
+
+	uint aw = (TA == NoTransposed) ? A.width() : A.height();
+	uint ah = (TA == NoTransposed) ? A.height() : A.width();
+	uint bw = (TB == NoTransposed) ? B.width() : B.height();
+	uint bh = (TB == NoTransposed) ? B.height() : B.width();
+
+	nvDebugCheck(aw == bh);
+	nvDebugCheck(bw == ah);
+	nvDebugCheck(w == bw);
+	nvDebugCheck(h == ah);
+
+	for (uint y = 0; y < h; y++)
+	{
+		for (uint x = 0; x < w; x++)
+		{
+			float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
+			C.setCoefficient(x, y, c);
+		}
+	}
+}
+
+
+
+
+
+/// Ctor. Init the size of the sparse matrix.
+SparseMatrix::SparseMatrix(uint d) : m_width(d)
+{
+	m_array.resize(d);
+}
+
+/// Ctor. Init the size of the sparse matrix.
+SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
+{
+	m_array.resize(h);
+}
+
+SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
+{
+	m_array = m.m_array;
+}
+
+const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
+{
+	nvCheck(width() == m.width());
+	nvCheck(height() == m.height());
+
+	m_array = m.m_array;
+
+	return *this;
+}
+
+
+// x is column, y is row
+float SparseMatrix::getCoefficient(uint x, uint y) const
+{
+	nvDebugCheck( x < width() );
+	nvDebugCheck( y < height() );
+
+	const uint count = m_array[y].count();
+	for (uint i = 0; i < count; i++)
+	{
+		if (m_array[y][i].x == x) return m_array[y][i].v;
+	}
+
+	return 0.0f;
+}
+
+void SparseMatrix::setCoefficient(uint x, uint y, float f)
+{
+	nvDebugCheck( x < width() );
+	nvDebugCheck( y < height() );
+
+	const uint count = m_array[y].count();
+	for (uint i = 0; i < count; i++)
+	{
+		if (m_array[y][i].x == x) 
+		{
+			m_array[y][i].v = f;
+			return;
+		}
+	}
+
+	Coefficient c = { x, f };
+	m_array[y].append( c );
+}
+
+void SparseMatrix::addCoefficient(uint x, uint y, float f)
+{
+	nvDebugCheck( x < width() );
+	nvDebugCheck( y < height() );
+
+	const uint count = m_array[y].count();
+	for (uint i = 0; i < count; i++)
+	{
+		if (m_array[y][i].x == x) 
+		{
+			m_array[y][i].v += f;
+			return;
+		}
+	}
+
+	Coefficient c = { x, f };
+	m_array[y].append( c );
+}
+
+void SparseMatrix::mulCoefficient(uint x, uint y, float f)
+{
+	nvDebugCheck( x < width() );
+	nvDebugCheck( y < height() );
+
+	const uint count = m_array[y].count();
+	for (uint i = 0; i < count; i++)
+	{
+		if (m_array[y][i].x == x) 
+		{
+			m_array[y][i].v *= f;
+			return;
+		}
+	}
+
+	Coefficient c = { x, f };
+	m_array[y].append( c );
+}
+
+
+float SparseMatrix::sumRow(uint y) const
+{
+	nvDebugCheck( y < height() );
+
+	const uint count = m_array[y].count();
+
+	/*float sum = 0;
+	for (uint i = 0; i < count; i++)
+	{
+		sum += m_array[y][i].v;
+	}
+	return sum;*/
+
+	KahanSum kahan;
+
+	for (uint i = 0; i < count; i++)
+	{
+		kahan.add(m_array[y][i].v);
+	}
+
+	return kahan.sum();
+}
+
+float SparseMatrix::dotRow(uint y, const FullVector & v) const
+{
+	nvDebugCheck( y < height() );
+
+	const uint count = m_array[y].count();
+
+	/*float sum = 0;
+	for (uint i = 0; i < count; i++)
+	{
+		sum += m_array[y][i].v * v[m_array[y][i].x];
+	}
+	return sum;*/
+
+	KahanSum kahan;
+
+	for (uint i = 0; i < count; i++)
+	{
+		kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
+	}
+
+	return kahan.sum();
+}
+
+void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
+{
+	nvDebugCheck(y < height());
+
+	const uint count = m_array[y].count();
+	for (uint i = 0; i < count; i++)
+	{
+		v[m_array[y][i].x] += alpha * m_array[y][i].v;
+	}
+}
+
+
+void SparseMatrix::clearRow(uint y)
+{
+	nvDebugCheck( y < height() );
+
+	m_array[y].clear();
+}
+
+void SparseMatrix::scaleRow(uint y, float f)
+{
+	nvDebugCheck( y < height() );
+
+	const uint count = m_array[y].count();
+	for (uint i = 0; i < count; i++)
+	{
+		m_array[y][i].v *= f;
+	}
+}
+
+void SparseMatrix::normalizeRow(uint y)
+{
+	nvDebugCheck( y < height() );
+
+	float norm = 0.0f;
+
+	const uint count = m_array[y].count();
+	for (uint i = 0; i < count; i++)
+	{
+		float f = m_array[y][i].v;
+		norm += f * f;
+	}
+
+	scaleRow(y, 1.0f / sqrtf(norm));
+}
+
+
+void SparseMatrix::clearColumn(uint x)
+{
+	nvDebugCheck(x < width());
+
+	for (uint y = 0; y < height(); y++)
+	{
+		const uint count = m_array[y].count();
+		for (uint e = 0; e < count; e++)
+		{
+			if (m_array[y][e].x == x)
+			{
+				m_array[y][e].v = 0.0f;
+				break;
+			}
+		}
+	}
+}
+
+void SparseMatrix::scaleColumn(uint x, float f)
+{
+	nvDebugCheck(x < width());
+
+	for (uint y = 0; y < height(); y++)
+	{
+		const uint count = m_array[y].count();
+		for (uint e = 0; e < count; e++)
+		{
+			if (m_array[y][e].x == x)
+			{
+				m_array[y][e].v *= f;
+				break;
+			}
+		}
+	}
+}
+
+const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
+{
+	return m_array[y];
+}
+
+
+// y = M * x
+void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
+{
+	mult(NoTransposed, M, x, y);
+}
+
+void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
+{
+	const uint w = M.width();
+	const uint h = M.height();
+
+	if (TM == Transposed)
+	{
+		nvDebugCheck( h == x.dimension() );
+		nvDebugCheck( w == y.dimension() );
+
+		y.fill(0.0f);
+
+		for (uint i = 0; i < h; i++)
+		{
+			M.madRow(i, x[i], y);
+		}
+	}
+	else
+	{
+		nvDebugCheck( w == x.dimension() );
+		nvDebugCheck( h == y.dimension() );
+
+		for (uint i = 0; i < h; i++)
+		{
+			y[i] = M.dotRow(i, x);
+		}
+	}
+}
+
+// y = alpha*A*x + beta*y
+void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
+{
+	sgemv(alpha, NoTransposed, A, x, beta, y);
+}
+
+void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
+{
+	const uint w = A.width();
+	const uint h = A.height();
+
+	if (TA == Transposed)
+	{
+		nvDebugCheck( h == x.dimension() );
+		nvDebugCheck( w == y.dimension() );
+
+		for (uint i = 0; i < h; i++)
+		{
+			A.madRow(i, alpha * x[i], y);
+		}
+	}
+	else
+	{
+		nvDebugCheck( w == x.dimension() );
+		nvDebugCheck( h == y.dimension() );
+
+		for (uint i = 0; i < h; i++)
+		{
+			y[i] = alpha * A.dotRow(i, x) + beta * y[i];
+		}
+	}
+}
+
+
+// dot y-row of A by x-column of B
+static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
+{
+	const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
+
+	const uint count = row.count();
+
+	/*float sum = 0.0f;
+	for (uint i = 0; i < count; i++)
+	{
+		const SparseMatrix::Coefficient & c = row[i];
+		sum += c.v * B.getCoefficient(x, c.x);
+	}
+	return sum;*/
+
+	KahanSum kahan;
+	for (uint i = 0; i < count; i++)
+	{
+		const SparseMatrix::Coefficient & c = row[i];
+		kahan.add(c.v * B.getCoefficient(x, c.x));
+	}
+
+	return kahan.sum();
+}
+
+// dot y-row of A by x-row of B
+static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
+{
+	const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
+
+	const uint count = row.count();
+
+	/*float sum = 0.0f;
+	for (uint i = 0; i < count; i++)
+	{
+		const SparseMatrix::Coefficient & c = row[i];
+		sum += c.v * B.getCoefficient(c.x, x);
+	}
+	//return sum;*/
+
+	KahanSum kahan;
+	for (uint i = 0; i < count; i++)
+	{
+		const SparseMatrix::Coefficient & c = row[i];
+		kahan.add(c.v * B.getCoefficient(c.x, x));
+	}
+
+	return kahan.sum();
+}
+
+// dot y-column of A by x-column of B
+static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
+{
+	nvDebugCheck(A.height() == B.height());
+
+	const uint h = A.height();
+
+	/*float sum = 0.0f;
+	for (uint i = 0; i < h; i++)
+	{
+		sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
+	}
+	//return sum;*/
+
+	KahanSum kahan;
+	for (uint i = 0; i < h; i++)
+	{
+		kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
+	}
+
+	return kahan.sum();
+}
+
+
+
+// C = A * B
+void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
+{
+	mult(NoTransposed, A, NoTransposed, B, C);
+}
+
+void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
+{
+	sgemm(1.0f, TA, A, TB, B, 0.0f, C);
+}
+
+// C = alpha*A*B + beta*C
+void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
+{
+	sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
+}
+
+void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
+{
+	const uint w = C.width();
+	const uint h = C.height();
+
+	uint aw = (TA == NoTransposed) ? A.width() : A.height();
+	uint ah = (TA == NoTransposed) ? A.height() : A.width();
+	uint bw = (TB == NoTransposed) ? B.width() : B.height();
+	uint bh = (TB == NoTransposed) ? B.height() : B.width();
+
+	nvDebugCheck(aw == bh);
+	nvDebugCheck(bw == ah);
+	nvDebugCheck(w == bw);
+	nvDebugCheck(h == ah);
+
+
+	for (uint y = 0; y < h; y++)
+	{
+		for (uint x = 0; x < w; x++)
+		{
+			float c = beta * C.getCoefficient(x, y);
+
+			if (TA == NoTransposed && TB == NoTransposed)
+			{
+				// dot y-row of A by x-column of B.
+				c += alpha * dotRowColumn(y, A, x, B);
+			}
+			else if (TA == Transposed && TB == Transposed)
+			{
+				// dot y-column of A by x-row of B.
+				c += alpha * dotRowColumn(x, B, y, A);
+			}
+			else if (TA == Transposed && TB == NoTransposed)
+			{
+				// dot y-column of A by x-column of B.
+				c += alpha * dotColumnColumn(y, A, x, B);
+			}
+			else if (TA == NoTransposed && TB == Transposed)
+			{
+				// dot y-row of A by x-row of B.
+				c += alpha * dotRowRow(y, A, x, B);
+			}
+
+			if (c != 0.0f)
+			{
+				C.setCoefficient(x, y, c);
+			}
+		}
+	}
+
+}
+
+// C = At * A
+void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
+{
+	// This is quite expensive...
+	mult(Transposed, A, NoTransposed, A, C);
+}
--- a/src/nvmath/Sparse.h
+++ b/src/nvmath/Sparse.h
@ -0,0 +1,198 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#ifndef NV_MATH_SPARSE_H
+#define NV_MATH_SPARSE_H
+
+#include <nvcore/Containers.h>
+#include <nvmath/nvmath.h>
+
+// Full and sparse vector and matrix classes. BLAS subset.
+
+namespace nv
+{
+	class FullVector;
+	class FullMatrix;
+	class SparseMatrix;
+
+
+	/// Fixed size vector class.
+	class FullVector
+	{
+	public:
+
+		FullVector(uint dim);
+		FullVector(const FullVector & v);
+
+		const FullVector & operator=(const FullVector & v);
+		
+		uint dimension() const { return m_array.count(); }
+
+		const float & operator[]( uint index ) const { return m_array[index]; }
+		float & operator[] ( uint index ) { return m_array[index]; }
+
+		void fill(float f);
+		
+		void operator+= (const FullVector & v);
+		void operator-= (const FullVector & v);
+		void operator*= (const FullVector & v);
+
+		void operator+= (float f);
+		void operator-= (float f);
+		void operator*= (float f);
+
+
+	private:
+
+		Array<float> m_array;
+
+	};
+
+	// Pseudo-BLAS interface.
+	NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y);	// y = a * x + y
+	NVMATH_API void copy(const FullVector & x, FullVector & y);
+	NVMATH_API void scal(float a, FullVector & x);
+	NVMATH_API float dot(const FullVector & x, const FullVector & y);
+
+
+	enum Transpose
+	{
+		NoTransposed = 0,
+		Transposed = 1
+	};
+
+	/// Full matrix class.
+	class FullMatrix
+	{
+	public:
+
+		FullMatrix(uint d);
+		FullMatrix(uint w, uint h);
+		FullMatrix(const FullMatrix & m);
+
+		const FullMatrix & operator=(const FullMatrix & m);
+
+		uint width() const { return m_width; }
+		uint height() const { return m_height; }
+		bool isSquare() const { return m_width == m_height; }
+		
+		float getCoefficient(uint x, uint y) const;
+
+		void setCoefficient(uint x, uint y, float f);
+		void addCoefficient(uint x, uint y, float f);
+		void mulCoefficient(uint x, uint y, float f);
+		
+		float dotRow(uint y, const FullVector & v) const;
+		void madRow(uint y, float alpha, FullVector & v) const;
+
+	protected:
+
+		bool isValid() const {
+			return m_array.size() == (m_width * m_height);
+		}
+
+	private:
+
+		const uint m_width;
+		const uint m_height;
+		Array<float> m_array;
+
+	};
+
+	NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
+	NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
+
+	// y = alpha*A*x + beta*y
+	NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
+	NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
+
+	NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
+	NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
+
+	// C = alpha*A*B + beta*C
+	NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
+	NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
+
+
+	/**
+	 * Sparse matrix class. The matrix is assumed to be sparse and to have
+	 * very few non-zero elements, for this reason it's stored in indexed 
+	 * format. To multiply column vectors efficiently, the matrix stores 
+	 * the elements in indexed-column order, there is a list of indexed 
+	 * elements for each row of the matrix. As with the FullVector the 
+	 * dimension of the matrix is constant.
+	**/
+	class SparseMatrix
+	{
+		friend class FullMatrix;
+	public:
+
+		// An element of the sparse array.
+		struct Coefficient {
+			uint x;  // column
+			float v; // value
+		};
+
+
+	public:
+
+		SparseMatrix(uint d);
+		SparseMatrix(uint w, uint h);
+		SparseMatrix(const SparseMatrix & m);
+
+		const SparseMatrix & operator=(const SparseMatrix & m);
+
+
+		uint width() const { return m_width; }
+		uint height() const { return m_array.count(); }
+		bool isSquare() const { return width() == height(); }
+
+		float getCoefficient(uint x, uint y) const; // x is column, y is row
+
+		void setCoefficient(uint x, uint y, float f);
+		void addCoefficient(uint x, uint y, float f);
+		void mulCoefficient(uint x, uint y, float f);
+
+		float sumRow(uint y) const;
+		float dotRow(uint y, const FullVector & v) const;
+		void madRow(uint y, float alpha, FullVector & v) const;
+
+		void clearRow(uint y);
+		void scaleRow(uint y, float f);
+		void normalizeRow(uint y);
+
+		void clearColumn(uint x);
+		void scaleColumn(uint x, float f);
+
+		const Array<Coefficient> & getRow(uint y) const;
+
+	private:
+
+		/// Number of columns.
+		const uint m_width;
+		
+		/// Array of matrix elements.
+		Array< Array<Coefficient> > m_array;
+
+	};
+
+	NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
+	NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
+
+	// y = alpha*A*x + beta*y
+	NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
+	NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
+
+	NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
+	NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
+
+	// C = alpha*A*B + beta*C
+	NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
+	NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
+
+	// C = At * A
+	NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
+
+} // nv namespace
+
+
+#endif // NV_MATH_SPARSE_H
--- a/src/nvmath/SphericalHarmonic.h
+++ b/src/nvmath/SphericalHarmonic.h
@ -3,6 +3,7 @@
 #ifndef NV_MATH_SPHERICALHARMONIC_H
 #define NV_MATH_SPHERICALHARMONIC_H

+#include <string.h> // memcpy
 #include <nvmath/Vector.h>

 namespace nv
--- a/src/nvmath/TriBox.cpp
+++ b/src/nvmath/TriBox.cpp
@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg
 	if(min>rad || max<-rad) return false;


-bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
+bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
 {
 	// use separating axis theorem to test overlap between triangle and box
 	// need to test for overlap in these directions:
@ -170,7 +170,7 @@ bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Trian
 }


-bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
+bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
 {
 	// use separating axis theorem to test overlap between triangle and box
 	// need to test for overlap in these directions:
--- a/src/nvmath/TypeSerialization.cpp
+++ b/src/nvmath/TypeSerialization.cpp
@ -0,0 +1,82 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#include <nvcore/Stream.h>
+
+#include <nvmath/Vector.h>
+#include <nvmath/Matrix.h>
+#include <nvmath/Quaternion.h>
+#include <nvmath/Basis.h>
+#include <nvmath/Box.h>
+
+#include <nvmath/TypeSerialization.h>
+
+using namespace nv;
+
+Stream & nv::operator<< (Stream & s, Vector2 & v)
+{
+	float x = v.x();
+	float y = v.y();
+
+	s << x << y;
+
+	if (s.isLoading())
+	{
+		v.set(x, y);
+	}
+
+	return s;
+}
+
+Stream & nv::operator<< (Stream & s, Vector3 & v)
+{
+	float x = v.x();
+	float y = v.y();
+	float z = v.z();
+
+	s << x << y << z;
+
+	if (s.isLoading())
+	{
+		v.set(x, y, z);
+	}
+
+	return s;
+}
+
+Stream & nv::operator<< (Stream & s, Vector4 & v)
+{
+	float x = v.x();
+	float y = v.y();
+	float z = v.z();
+	float w = v.w();
+
+	s << x << y << z << w;
+
+	if (s.isLoading())
+	{
+		v.set(x, y, z, w);
+	}
+
+	return s;
+}
+
+Stream & nv::operator<< (Stream & s, Matrix & m)
+{
+	return s;
+}
+
+Stream & nv::operator<< (Stream & s, Quaternion & q)
+{
+	return s << q.asVector();
+}
+
+Stream & nv::operator<< (Stream & s, Basis & basis)
+{
+	return s << basis.tangent << basis.bitangent << basis.normal;
+}
+
+Stream & nv::operator<< (Stream & s, Box & box)
+{
+	return s << box.m_mins << box.m_maxs;
+}
+
--- a/src/nvmath/TypeSerialization.h
+++ b/src/nvmath/TypeSerialization.h
@ -0,0 +1,32 @@
+// This code is in the public domain -- castanyo@yahoo.es
+
+#ifndef NV_MATH_TYPESERIALIZATION_H
+#define NV_MATH_TYPESERIALIZATION_H
+
+#include <nvmath/nvmath.h>
+
+namespace nv
+{
+	class Stream;
+
+	class Vector2;
+	class Vector3;
+	class Vector4;
+
+	class Matrix;
+	class Quaternion;
+	struct Basis;
+	class Box;
+	
+	NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
+	NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
+	NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
+
+	NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
+	NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
+	NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
+	NVMATH_API Stream & operator<< (Stream & s, Box & obj);
+
+} // nv namespace
+
+#endif // NV_MATH_TYPESERIALIZATION_H
--- a/src/nvmath/Vector.h
+++ b/src/nvmath/Vector.h
@ -4,7 +4,7 @@
 #define NV_MATH_VECTOR_H

 #include <nvmath/nvmath.h>
-#include <nvcore/Containers.h> // min, max
+#include <nvcore/Algorithms.h> // min, max

 namespace nv
 {
@ -71,6 +71,7 @@ public:
 	const Vector2 & xy() const;

 	scalar component(uint idx) const;
+	void setComponent(uint idx, scalar f);

 	const scalar * ptr() const;

@ -239,13 +240,21 @@ inline const Vector2 & Vector3::xy() const
 inline scalar Vector3::component(uint idx) const
 {
 	nvDebugCheck(idx < 3);
-	if (idx == 0) return x();
-	if (idx == 1) return y();
-	if (idx == 2) return z();
+	if (idx == 0) return m_x;
+	if (idx == 1) return m_y;
+	if (idx == 2) return m_z;
 	nvAssume(false);
 	return 0.0f;
 }

+inline void Vector3::setComponent(uint idx, float f)
+{
+	nvDebugCheck(idx < 3);
+	if (idx == 0) m_x = f;
+	else if (idx == 1) m_y = f;
+	else if (idx == 2) m_z = f;
+}
+
 inline const scalar * Vector3::ptr() const
 {
 	return &m_x;
@ -477,6 +486,35 @@ inline scalar length(Vector2::Arg v)
 	return sqrtf(length_squared(v));
 }

+inline scalar inverse_length(Vector2::Arg v)
+{
+	return 1.0f / sqrtf(length_squared(v));
+}
+
+inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
+{
+	return equal(length(v), 1, epsilon);
+}
+
+inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
+{
+	float l = length(v);
+	nvDebugCheck(!isZero(l, epsilon));
+	Vector2 n = scale(v, 1.0f / l);
+	nvDebugCheck(isNormalized(n));
+	return n;
+}
+
+inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
+{
+	float l = length(v);
+	if (isZero(l, epsilon)) {
+		return fallback;
+	}
+	return scale(v, 1.0f / l);
+}
+
+
 inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
 {
 	return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
@ -595,6 +633,11 @@ inline scalar length(Vector3::Arg v)
 	return sqrtf(length_squared(v));
 }

+inline scalar inverse_length(Vector3::Arg v)
+{
+	return 1.0f / sqrtf(length_squared(v));
+}
+
 inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
 {
 	return equal(length(v), 1, epsilon);
@ -716,6 +759,11 @@ inline scalar length(Vector4::Arg v)
 	return sqrtf(length_squared(v));
 }

+inline scalar inverse_length(Vector4::Arg v)
+{
+	return 1.0f / sqrtf(length_squared(v));
+}
+
 inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
 {
 	return equal(length(v), 1, epsilon);
--- a/src/nvmath/nvmath.h
+++ b/src/nvmath/nvmath.h
@ -136,6 +136,11 @@ inline float lerp(float f0, float f1, float t)
 	return f0 * s + f1 * t;
 }

+inline float square(float f)
+{
+	return f * f;
+}
+
 } // nv

 #endif // NV_MATH_H