Create 2.0.4 release.

2008-10-06 20:16:02 +00:00
parent b2d6122769
commit 624c5bd316
195 changed files with 1587 additions and 18570 deletions
--- a/src/nvmath/Basis.cpp
+++ b/src/nvmath/Basis.cpp
@ -27,7 +27,8 @@ void Basis::orthonormalize(float epsilon /*= NV_EPSILON*/)
 	tangent -= normal * dot(normal, tangent);
 	tangent = ::normalize(tangent, epsilon);

-	bitangent -= normal * dot(normal, bitangent) + tangent * dot(tangent, bitangent);
+	bitangent -= normal * dot(normal, bitangent);
+	bitangent -= tangent * dot(tangent, bitangent);
 	bitangent = ::normalize(bitangent, epsilon);
 }

@ -48,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
 			return;
 		}
 	}
-	normal = nv::normalize(normal, epsilon);
+	normal = ::normalize(normal, epsilon);
 	
 	tangent -= normal * dot(normal, tangent);
 	bitangent -= normal * dot(normal, bitangent);
@ -67,8 +68,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
 	}
 	else
 	{
-#if 0
-		tangent = nv::normalize(tangent, epsilon);
+		tangent = ::normalize(tangent, epsilon);
 		bitangent -= tangent * dot(tangent, bitangent);
 		
 		if (length(bitangent) < epsilon)
@ -78,47 +78,11 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
 		}
 		else
 		{
-			bitangent = nv::normalize(bitangent, epsilon);
+			tangent = ::normalize(tangent, epsilon);
 		}
-#else
-		if (length(bitangent) < epsilon)
-		{
-			bitangent = cross(tangent, normal);
-			nvCheck(isNormalized(bitangent));
-		}
-		else
-		{
-			tangent = nv::normalize(tangent);
-			bitangent = nv::normalize(bitangent);
-			
-			Vector3 bisector = nv::normalize(tangent + bitangent);
-			Vector3 axis = cross(bisector, normal);
-			
-			nvDebugCheck(isNormalized(axis, epsilon));
-			nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
-			
-			if (dot(axis, tangent) > 0)
-			{
-				tangent = nv::normalize(bisector + axis);
-				bitangent = nv::normalize(bisector - axis);
-			}
-			else
-			{
-				tangent = nv::normalize(bisector - axis);
-				bitangent = nv::normalize(bisector + axis);
-			}
-		}
-#endif
 	}
 	
-	/*// Check vector lengths.
-	if (!isNormalized(normal, epsilon))
-	{
-		nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z());
-		nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z());
-		nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z());
-	}*/
-	
+	// Check vector lengths.
 	nvCheck(isNormalized(normal, epsilon));
 	nvCheck(isNormalized(tangent, epsilon));
 	nvCheck(isNormalized(bitangent, epsilon));
@ -161,18 +125,9 @@ void Basis::buildFrameForDirection(Vector3::Arg d)
 	bitangent = cross(normal, tangent);
 }

-bool Basis::isValid() const
-{
-	if (equal(normal, Vector3(zero))) return false;
-	if (equal(tangent, Vector3(zero))) return false;
-	if (equal(bitangent, Vector3(zero))) return false;
-
-	if (equal(determinant(), 0.0f)) return false;
-	
-	return true;
-}


+/*
 /// Transform by this basis. (From this basis to object space).
 Vector3 Basis::transform(Vector3::Arg v) const
 {
@ -189,31 +144,30 @@ Vector3 Basis::transformT(Vector3::Arg v)
 }

 /// Transform by the inverse. (From object space to this basis).
-/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
+/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
 Vector3 Basis::transformI(Vector3::Arg v) const
 {
 	const float det = determinant();
-	nvDebugCheck(!equal(det, 0.0f, 0.0f));
+	nvCheck(!equalf(det, 0.0f));
 	
 	const float idet = 1.0f / det;

 	// Rows of the inverse matrix.
-	Vector3 r0(
-		 (bitangent.y() * normal.z() - bitangent.z() * normal.y()),
-		-(bitangent.x() * normal.z() - bitangent.z() * normal.x()),
-		 (bitangent.x() * normal.y() - bitangent.y() * normal.x()));
+	Vector3 r0, r1, r2;
+	r0.x =  (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
+	r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
+	r0.z =  (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;

-	Vector3 r1(
-		-(tangent.y() * normal.z() - tangent.z() * normal.y()),
-		 (tangent.x() * normal.z() - tangent.z() * normal.x()),
-		-(tangent.x() * normal.y() - tangent.y() * normal.x()));
+	r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
+	r1.y =  (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
+	r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;

-	Vector3 r2(
-		 (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()),
-		-(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()),
-		 (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()));
+	r2.x =  (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
+	r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
+	r2.z =  (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;

-	return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
+	return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
 }	
+*/


--- a/src/nvmath/Basis.h
+++ b/src/nvmath/Basis.h
@ -54,8 +54,7 @@ namespace nv
 				tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
 		}
 		
-		bool isValid() const;
-		
+		/*
 		// Get transform matrix for this basis.
 		NVMATH_API Matrix matrix() const;
 		
@ -67,7 +66,7 @@ namespace nv

 		// Transform by the inverse. (From object space to this basis).
 		NVMATH_API Vector3 transformI(Vector3::Arg v) const;
-
+		*/
 		
 		Vector3 tangent;
 		Vector3 bitangent;
--- a/src/nvmath/Box.h
+++ b/src/nvmath/Box.h
@ -9,7 +9,6 @@

 namespace nv
 {
-class Stream;

 /// Axis Aligned Bounding Box.
 class Box
@ -28,13 +27,11 @@ public:
 	// Cast operators.
 	operator const float * () const { return reinterpret_cast<const float *>(this); }

-	// Min corner of the box.
-	Vector3 minCorner() const { return m_mins; }
-	Vector3 & minCorner() { return m_mins; }
+	/// Min corner of the box.
+	Vector3 mins() const { return m_mins; }

-	// Max corner of the box.
-	Vector3 maxCorner() const { return m_maxs; }
-	Vector3 & maxCorner() { return m_maxs; }
+	/// Max corner of the box.
+	Vector3 maxs() const { return m_maxs; }

 	/// Clear the bounds.
 	void clearBounds()
@ -111,7 +108,7 @@ public:
 	float area() const
 	{
 		const Vector3 d = extents();
-		return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
+		return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
 	}	

 	/// Get the volume of the box.
@ -121,16 +118,6 @@ public:
 		return 8.0f * (d.x() * d.y() * d.z());
 	}
 	
-	/// Return true if the box contains the given point.
-	bool contains(Vector3::Arg p) const
-	{
-		return 
-			m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
-			m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
-	}
-
-	friend Stream & operator<< (Stream & s, Box & box);
-
 private:

 	Vector3 m_mins;
@ -138,6 +125,15 @@ private:
 };


+/*
+/// Point inside box test.
+inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
+{
+	return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
+		m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
+}
+*/
+

 } // nv namespace

--- a/src/nvmath/CMakeLists.txt
+++ b/src/nvmath/CMakeLists.txt
@ -5,20 +5,13 @@ SET(MATH_SRCS
 	Vector.h
 	Matrix.h
 	Quaternion.h
-	Plane.h Plane.cpp
 	Box.h
 	Color.h
 	Montecarlo.h Montecarlo.cpp
 	Random.h Random.cpp
 	SphericalHarmonic.h SphericalHarmonic.cpp
 	Basis.h Basis.cpp
-	Triangle.h Triangle.cpp TriBox.cpp
-	Polygon.h Polygon.cpp
-	TypeSerialization.h TypeSerialization.cpp
-	Sparse.h Sparse.cpp
-	Solver.h Solver.cpp
-	KahanSum.h
-	Half.h Half.cpp)
+	Triangle.h Triangle.cpp TriBox.cpp)

 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

--- a/src/nvmath/Half.cpp
+++ b/src/nvmath/Half.cpp
@ -1,563 +0,0 @@
-// Branch-free implementation of half-precision (16 bit) floating point
-// Copyright 2006 Mike Acton <macton@gmail.com>
-// 
-// Permission is hereby granted, free of charge, to any person obtaining a 
-// copy of this software and associated documentation files (the "Software"),
-// to deal in the Software without restriction, including without limitation
-// the rights to use, copy, modify, merge, publish, distribute, sublicense, 
-// and/or sell copies of the Software, and to permit persons to whom the 
-// Software is furnished to do so, subject to the following conditions:
-// 
-// The above copyright notice and this permission notice shall be included 
-// in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 
-// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 
-// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
-// THE SOFTWARE
-//
-// Half-precision floating point format
-// ------------------------------------
-//
-//   | Field    | Last | First | Note
-//   |----------|------|-------|----------
-//   | Sign     | 15   | 15    |
-//   | Exponent | 14   | 10    | Bias = 15
-//   | Mantissa | 9    | 0     |
-//
-// Compiling
-// ---------
-//
-//  Preferred compile flags for GCC: 
-//     -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
-//
-//     This file is a C99 source file, intended to be compiled with a C99 
-//     compliant compiler. However, for the moment it remains combatible
-//     with C++98. Therefore if you are using a compiler that poorly implements
-//     C standards (e.g. MSVC), it may be compiled as C++. This is not
-//     guaranteed for future versions. 
-//
-// Features
-// --------
-//
-//  * QNaN + <x>  = QNaN
-//  * <x>  + +INF = +INF
-//  * <x>  - -INF = -INF
-//  * INF  - INF  = SNaN
-//  * Denormalized values
-//  * Difference of ZEROs is always +ZERO
-//  * Sum round with guard + round + sticky bit (grs)
-//  * And of course... no branching
-// 
-// Precision of Sum
-// ----------------
-//
-//  (SUM)        uint16 z = half_add( x, y );
-//  (DIFFERENCE) uint16 z = half_add( x, -y );
-//
-//     Will have exactly (0 ulps difference) the same result as:
-//     (For 32 bit IEEE 784 floating point and same rounding mode)
-//
-//     union FLOAT_32
-//     {
-//       float    f32;
-//       uint32 u32;
-//     };
-//
-//     union FLOAT_32 fx = { .u32 = half_to_float( x ) };
-//     union FLOAT_32 fy = { .u32 = half_to_float( y ) };
-//     union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32    };
-//     uint16       z  = float_to_half( fz );
-//
-
-#include "Half.h"
-#include <stdio.h>
-
-// Load immediate
-static inline uint32 _uint32_li( uint32 a )
-{
-  return (a);
-}
-
-// Decrement
-static inline uint32 _uint32_dec( uint32 a )
-{
-  return (a - 1);
-}
-
-// Complement
-static inline uint32 _uint32_not( uint32 a )
-{
-  return (~a);
-}
-
-// Negate
-static inline uint32 _uint32_neg( uint32 a )
-{
-#if NV_CC_MSVC
-  // prevent msvc warning.
-  return ~a + 1;
-#else
-  return (-a);
-#endif
-}
-
-// Extend sign
-static inline uint32 _uint32_ext( uint32 a )
-{
-  return (((int32)a)>>31);
-}
-
-// And
-static inline uint32 _uint32_and( uint32 a, uint32 b )
-{
-  return (a & b);
-}
-
-// And with Complement
-static inline uint32 _uint32_andc( uint32 a, uint32 b )
-{
-  return (a & ~b);
-}
-
-// Or
-static inline uint32 _uint32_or( uint32 a, uint32 b )
-{
-  return (a | b);
-}
-
-// Shift Right Logical
-static inline uint32 _uint32_srl( uint32 a, int sa )
-{
-  return (a >> sa);
-}
-
-// Shift Left Logical
-static inline uint32 _uint32_sll( uint32 a, int sa )
-{
-  return (a << sa);
-}
-
-// Add
-static inline uint32 _uint32_add( uint32 a, uint32 b )
-{
-  return (a + b);
-}
-
-// Subtract
-static inline uint32 _uint32_sub( uint32 a, uint32 b )
-{
-  return (a - b);
-}
-
-// Select on Sign bit
-static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b )
-{
-  const uint32 mask   = _uint32_ext( test );
-  const uint32 sel_a  = _uint32_and(  a,     mask  );
-  const uint32 sel_b  = _uint32_andc( b,     mask  );
-  const uint32 result = _uint32_or(   sel_a, sel_b );
-
-  return (result);
-}
-
-// Load Immediate
-static inline uint16 _uint16_li( uint16 a )
-{
-  return (a);
-}
-
-// Extend sign
-static inline uint16 _uint16_ext( uint16 a )
-{
-  return (((int16)a)>>15);
-}
-
-// Negate
-static inline uint16 _uint16_neg( uint16 a )
-{
-  return (-a);
-}
-
-// Complement
-static inline uint16 _uint16_not( uint16 a )
-{
-  return (~a);
-}
-
-// Decrement
-static inline uint16 _uint16_dec( uint16 a )
-{
-  return (a - 1);
-}
-
-// Shift Left Logical
-static inline uint16 _uint16_sll( uint16 a, int sa )
-{
-  return (a << sa);
-}
-
-// Shift Right Logical
-static inline uint16 _uint16_srl( uint16 a, int sa )
-{
-  return (a >> sa);
-}
-
-// Add
-static inline uint16 _uint16_add( uint16 a, uint16 b )
-{
-  return (a + b);
-}
-
-// Subtract
-static inline uint16 _uint16_sub( uint16 a, uint16 b )
-{
-  return (a - b);
-}
-
-// And
-static inline uint16 _uint16_and( uint16 a, uint16 b )
-{
-  return (a & b);
-}
-
-// Or
-static inline uint16 _uint16_or( uint16 a, uint16 b )
-{
-  return (a | b);
-}
-
-// Exclusive Or
-static inline uint16 _uint16_xor( uint16 a, uint16 b )
-{
-  return (a ^ b);
-}
-
-// And with Complement
-static inline uint16 _uint16_andc( uint16 a, uint16 b )
-{
-  return (a & ~b);
-}
-
-// And then Shift Right Logical
-static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa )
-{
-  return ((a & b) >> sa);
-}
-
-// Shift Right Logical then Mask
-static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask )
-{
-  return ((a >> sa) & mask);
-}
-
-// Add then Mask
-static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask )
-{
-  return ((a + b) & mask);
-}
-
-
-// Select on Sign bit
-static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b )
-{
-  const uint16 mask   = _uint16_ext( test );
-  const uint16 sel_a  = _uint16_and(  a,     mask  );
-  const uint16 sel_b  = _uint16_andc( b,     mask  );
-  const uint16 result = _uint16_or(   sel_a, sel_b );
-
-  return (result);
-}
-
-// Count Leading Zeros
-static inline uint32 _uint32_cntlz( uint32 x )
-{
-#ifdef __GNUC__
-  /* On PowerPC, this will map to insn: cntlzw */
-  /* On Pentium, this will map to insn: clz    */
-  uint32 nlz = __builtin_clz( x );
-  return (nlz);
-#else
-  const uint32 x0  = _uint32_srl(  x,  1 );
-  const uint32 x1  = _uint32_or(   x,  x0 );
-  const uint32 x2  = _uint32_srl(  x1, 2 );
-  const uint32 x3  = _uint32_or(   x1, x2 );
-  const uint32 x4  = _uint32_srl(  x3, 4 );
-  const uint32 x5  = _uint32_or(   x3, x4 );
-  const uint32 x6  = _uint32_srl(  x5, 8 );
-  const uint32 x7  = _uint32_or(   x5, x6 );
-  const uint32 x8  = _uint32_srl(  x7, 16 );
-  const uint32 x9  = _uint32_or(   x7, x8 );
-  const uint32 xA  = _uint32_not(  x9 );
-  const uint32 xB  = _uint32_srl(  xA, 1 );
-  const uint32 xC  = _uint32_and(  xB, 0x55555555 );
-  const uint32 xD  = _uint32_sub(  xA, xC );
-  const uint32 xE  = _uint32_and(  xD, 0x33333333 );
-  const uint32 xF  = _uint32_srl(  xD, 2 );
-  const uint32 x10 = _uint32_and(  xF, 0x33333333 );
-  const uint32 x11 = _uint32_add(  xE, x10 );
-  const uint32 x12 = _uint32_srl(  x11, 4 );
-  const uint32 x13 = _uint32_add(  x11, x12 );
-  const uint32 x14 = _uint32_and(  x13, 0x0f0f0f0f );
-  const uint32 x15 = _uint32_srl(  x14, 8 );
-  const uint32 x16 = _uint32_add(  x14, x15 );
-  const uint32 x17 = _uint32_srl(  x16, 16 );
-  const uint32 x18 = _uint32_add(  x16, x17 );
-  const uint32 x19 = _uint32_and(  x18, 0x0000003f );
-  return ( x19 );
-#endif
-}
-
-// Count Leading Zeros
-static inline uint16 _uint16_cntlz( uint16 x )
-{
-#ifdef __GNUC__
-  /* On PowerPC, this will map to insn: cntlzw */
-  /* On Pentium, this will map to insn: clz    */
-  uint32 x32   = _uint32_sll( x, 16 );
-  uint16 nlz   = (uint16)__builtin_clz( x32 );
-  return (nlz);
-#else
-  const uint16 x0  = _uint16_srl(  x,  1 );
-  const uint16 x1  = _uint16_or(   x,  x0 );
-  const uint16 x2  = _uint16_srl(  x1, 2 );
-  const uint16 x3  = _uint16_or(   x1, x2 );
-  const uint16 x4  = _uint16_srl(  x3, 4 );
-  const uint16 x5  = _uint16_or(   x3, x4 );
-  const uint16 x6  = _uint16_srl(  x5, 8 );
-  const uint16 x7  = _uint16_or(   x5, x6 );
-  const uint16 x8  = _uint16_not(  x7 );
-  const uint16 x9  = _uint16_srlm( x8, 1, 0x5555 );
-  const uint16 xA  = _uint16_sub(  x8, x9 );
-  const uint16 xB  = _uint16_and(  xA, 0x3333 );
-  const uint16 xC  = _uint16_srlm( xA, 2, 0x3333 );
-  const uint16 xD  = _uint16_add(  xB, xC );
-  const uint16 xE  = _uint16_srl(  xD, 4 );
-  const uint16 xF  = _uint16_addm( xD, xE, 0x0f0f );
-  const uint16 x10 = _uint16_srl(  xF, 8 );
-  const uint16 x11 = _uint16_addm( xF, x10, 0x001f );
-  return ( x11 );
-#endif
-}
-
-uint16
-half_from_float( uint32 f )
-{
-  const uint32 one                        = _uint32_li( 0x00000001 );
-  const uint32 f_e_mask                   = _uint32_li( 0x7f800000 );
-  const uint32 f_m_mask                   = _uint32_li( 0x007fffff );
-  const uint32 f_s_mask                   = _uint32_li( 0x80000000 );
-  const uint32 h_e_mask                   = _uint32_li( 0x00007c00 );
-  const uint32 f_e_pos                    = _uint32_li( 0x00000017 );
-  const uint32 f_m_round_bit              = _uint32_li( 0x00001000 );
-  const uint32 h_nan_em_min               = _uint32_li( 0x00007c01 );
-  const uint32 f_h_s_pos_offset           = _uint32_li( 0x00000010 );
-  const uint32 f_m_hidden_bit             = _uint32_li( 0x00800000 );
-  const uint32 f_h_m_pos_offset           = _uint32_li( 0x0000000d );
-  const uint32 f_h_bias_offset            = _uint32_li( 0x38000000 );
-  const uint32 f_m_snan_mask              = _uint32_li( 0x003fffff );
-  const uint16 h_snan_mask                = _uint32_li( 0x00007e00 );
-  const uint32 f_e                        = _uint32_and( f, f_e_mask  );
-  const uint32 f_m                        = _uint32_and( f, f_m_mask  );
-  const uint32 f_s                        = _uint32_and( f, f_s_mask  );
-  const uint32 f_e_h_bias                 = _uint32_sub( f_e,               f_h_bias_offset );
-  const uint32 f_e_h_bias_amount          = _uint32_srl( f_e_h_bias,        f_e_pos         );
-  const uint32 f_m_round_mask             = _uint32_and( f_m,               f_m_round_bit     );
-  const uint32 f_m_round_offset           = _uint32_sll( f_m_round_mask,    one               );
-  const uint32 f_m_rounded                = _uint32_add( f_m,               f_m_round_offset  );
-  const uint32 f_m_rounded_overflow       = _uint32_and( f_m_rounded,       f_m_hidden_bit    );
-  const uint32 f_m_denorm_sa              = _uint32_sub( one,               f_e_h_bias_amount );
-  const uint32 f_m_with_hidden            = _uint32_or(  f_m_rounded,       f_m_hidden_bit    );
-  const uint32 f_m_denorm                 = _uint32_srl( f_m_with_hidden,   f_m_denorm_sa     );
-  const uint32 f_em_norm_packed           = _uint32_or(  f_e_h_bias,        f_m_rounded       );
-  const uint32 f_e_overflow               = _uint32_add( f_e_h_bias,        f_m_hidden_bit    );
-  const uint32 h_s                        = _uint32_srl( f_s,               f_h_s_pos_offset );
-  const uint32 h_m_nan                    = _uint32_srl( f_m,               f_h_m_pos_offset );
-  const uint32 h_m_denorm                 = _uint32_srl( f_m_denorm,        f_h_m_pos_offset );
-  const uint32 h_em_norm                  = _uint32_srl( f_em_norm_packed,  f_h_m_pos_offset );
-  const uint32 h_em_overflow              = _uint32_srl( f_e_overflow,      f_h_m_pos_offset );
-  const uint32 is_e_eqz_msb               = _uint32_dec(  f_e     );
-  const uint32 is_m_nez_msb               = _uint32_neg(  f_m     );
-  const uint32 is_h_m_nan_nez_msb         = _uint32_neg(  h_m_nan );
-  const uint32 is_e_nflagged_msb          = _uint32_sub(  f_e,                 f_e_mask          );
-  const uint32 is_ninf_msb                = _uint32_or(   is_e_nflagged_msb,   is_m_nez_msb      );
-  const uint32 is_underflow_msb           = _uint32_sub(  is_e_eqz_msb,        f_h_bias_offset   );
-  const uint32 is_nan_nunderflow_msb      = _uint32_or(   is_h_m_nan_nez_msb,  is_e_nflagged_msb );
-  const uint32 is_m_snan_msb              = _uint32_sub(  f_m_snan_mask,       f_m               );
-  const uint32 is_snan_msb                = _uint32_andc( is_m_snan_msb,       is_e_nflagged_msb );
-  const uint32 is_overflow_msb            = _uint32_neg(  f_m_rounded_overflow );
-  const uint32 h_nan_underflow_result     = _uint32_sels( is_nan_nunderflow_msb, h_em_norm,                h_nan_em_min       );
-  const uint32 h_inf_result               = _uint32_sels( is_ninf_msb,           h_nan_underflow_result,   h_e_mask           );
-  const uint32 h_underflow_result         = _uint32_sels( is_underflow_msb,      h_m_denorm,               h_inf_result       );
-  const uint32 h_overflow_result          = _uint32_sels( is_overflow_msb,       h_em_overflow,            h_underflow_result );
-  const uint32 h_em_result                = _uint32_sels( is_snan_msb,           h_snan_mask,              h_overflow_result  );
-  const uint32 h_result                   = _uint32_or( h_em_result, h_s );
-
-  return (h_result);
-}
-
-uint32 
-half_to_float( uint16 h )
-{
-  const uint32 h_e_mask              = _uint32_li( 0x00007c00 );
-  const uint32 h_m_mask              = _uint32_li( 0x000003ff );
-  const uint32 h_s_mask              = _uint32_li( 0x00008000 );
-  const uint32 h_f_s_pos_offset      = _uint32_li( 0x00000010 );
-  const uint32 h_f_e_pos_offset      = _uint32_li( 0x0000000d );
-  const uint32 h_f_bias_offset       = _uint32_li( 0x0001c000 );
-  const uint32 f_e_mask              = _uint32_li( 0x7f800000 );
-  const uint32 f_m_mask              = _uint32_li( 0x007fffff );
-  const uint32 h_f_e_denorm_bias     = _uint32_li( 0x0000007e );
-  const uint32 h_f_m_denorm_sa_bias  = _uint32_li( 0x00000008 );
-  const uint32 f_e_pos               = _uint32_li( 0x00000017 );
-  const uint32 h_e_mask_minus_one    = _uint32_li( 0x00007bff );
-  const uint32 h_e                   = _uint32_and( h, h_e_mask );
-  const uint32 h_m                   = _uint32_and( h, h_m_mask );
-  const uint32 h_s                   = _uint32_and( h, h_s_mask );
-  const uint32 h_e_f_bias            = _uint32_add( h_e, h_f_bias_offset );
-  const uint32 h_m_nlz               = _uint32_cntlz( h_m );
-  const uint32 f_s                   = _uint32_sll( h_s,        h_f_s_pos_offset );
-  const uint32 f_e                   = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
-  const uint32 f_m                   = _uint32_sll( h_m,        h_f_e_pos_offset );
-  const uint32 f_em                  = _uint32_or(  f_e,        f_m              );
-  const uint32 h_f_m_sa              = _uint32_sub( h_m_nlz,             h_f_m_denorm_sa_bias );
-  const uint32 f_e_denorm_unpacked   = _uint32_sub( h_f_e_denorm_bias,   h_f_m_sa             );
-  const uint32 h_f_m                 = _uint32_sll( h_m,                 h_f_m_sa             );
-  const uint32 f_m_denorm            = _uint32_and( h_f_m,               f_m_mask             );
-  const uint32 f_e_denorm            = _uint32_sll( f_e_denorm_unpacked, f_e_pos              );
-  const uint32 f_em_denorm           = _uint32_or(  f_e_denorm,          f_m_denorm           );
-  const uint32 f_em_nan              = _uint32_or(  f_e_mask,            f_m                  );
-  const uint32 is_e_eqz_msb          = _uint32_dec(  h_e );
-  const uint32 is_m_nez_msb          = _uint32_neg(  h_m );
-  const uint32 is_e_flagged_msb      = _uint32_sub(  h_e_mask_minus_one, h_e );
-  const uint32 is_zero_msb           = _uint32_andc( is_e_eqz_msb,       is_m_nez_msb );
-  const uint32 is_inf_msb            = _uint32_andc( is_e_flagged_msb,   is_m_nez_msb );
-  const uint32 is_denorm_msb         = _uint32_and(  is_m_nez_msb,       is_e_eqz_msb );
-  const uint32 is_nan_msb            = _uint32_and(  is_e_flagged_msb,   is_m_nez_msb ); 
-  const uint32 is_zero               = _uint32_ext(  is_zero_msb );
-  const uint32 f_zero_result         = _uint32_andc( f_em, is_zero );
-  const uint32 f_denorm_result       = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
-  const uint32 f_inf_result          = _uint32_sels( is_inf_msb,    f_e_mask,    f_denorm_result );
-  const uint32 f_nan_result          = _uint32_sels( is_nan_msb,    f_em_nan,    f_inf_result    );
-  const uint32 f_result              = _uint32_or( f_s, f_nan_result );
- 
-  return (f_result);
-}
-
-uint16
-half_add( uint16 x, uint16 y )
-{
-  const uint16 one                       = _uint16_li( 0x0001 );
-  const uint16 msb_to_lsb_sa             = _uint16_li( 0x000f );
-  const uint16 h_s_mask                  = _uint16_li( 0x8000 );
-  const uint16 h_e_mask                  = _uint16_li( 0x7c00 );
-  const uint16 h_m_mask                  = _uint16_li( 0x03ff );
-  const uint16 h_m_msb_mask              = _uint16_li( 0x2000 );
-  const uint16 h_m_msb_sa                = _uint16_li( 0x000d );
-  const uint16 h_m_hidden                = _uint16_li( 0x0400 );
-  const uint16 h_e_pos                   = _uint16_li( 0x000a );
-  const uint16 h_e_bias_minus_one        = _uint16_li( 0x000e );
-  const uint16 h_m_grs_carry             = _uint16_li( 0x4000 );
-  const uint16 h_m_grs_carry_pos         = _uint16_li( 0x000e );
-  const uint16 h_grs_size                = _uint16_li( 0x0003 );
-  const uint16 h_snan                    = _uint16_li( 0xfe00 );
-  const uint16 h_e_mask_minus_one        = _uint16_li( 0x7bff );
-  const uint16 h_grs_round_carry         = _uint16_sll( one, h_grs_size );
-  const uint16 h_grs_round_mask          = _uint16_sub( h_grs_round_carry, one );
-  const uint16 x_e                       = _uint16_and( x, h_e_mask );
-  const uint16 y_e                       = _uint16_and( y, h_e_mask );
-  const uint16 is_y_e_larger_msb         = _uint16_sub( x_e, y_e );
-  const uint16 a                         = _uint16_sels( is_y_e_larger_msb, y, x);
-  const uint16 a_s                       = _uint16_and( a, h_s_mask );
-  const uint16 a_e                       = _uint16_and( a, h_e_mask );
-  const uint16 a_m_no_hidden_bit         = _uint16_and( a, h_m_mask );
-  const uint16 a_em_no_hidden_bit        = _uint16_or( a_e, a_m_no_hidden_bit );
-  const uint16 b                         = _uint16_sels( is_y_e_larger_msb, x, y);
-  const uint16 b_s                       = _uint16_and( b, h_s_mask );
-  const uint16 b_e                       = _uint16_and( b, h_e_mask );
-  const uint16 b_m_no_hidden_bit         = _uint16_and( b, h_m_mask );
-  const uint16 b_em_no_hidden_bit        = _uint16_or( b_e, b_m_no_hidden_bit );
-  const uint16 is_diff_sign_msb          = _uint16_xor( a_s, b_s );
-  const uint16 is_a_inf_msb              = _uint16_sub( h_e_mask_minus_one, a_em_no_hidden_bit );
-  const uint16 is_b_inf_msb              = _uint16_sub( h_e_mask_minus_one, b_em_no_hidden_bit );
-  const uint16 is_undenorm_msb           = _uint16_dec( a_e );
-  const uint16 is_undenorm               = _uint16_ext( is_undenorm_msb );
-  const uint16 is_both_inf_msb           = _uint16_and( is_a_inf_msb, is_b_inf_msb );
-  const uint16 is_invalid_inf_op_msb     = _uint16_and( is_both_inf_msb, b_s );
-  const uint16 is_a_e_nez_msb            = _uint16_neg( a_e );
-  const uint16 is_b_e_nez_msb            = _uint16_neg( b_e );
-  const uint16 is_a_e_nez                = _uint16_ext( is_a_e_nez_msb );
-  const uint16 is_b_e_nez                = _uint16_ext( is_b_e_nez_msb );
-  const uint16 a_m_hidden_bit            = _uint16_and( is_a_e_nez, h_m_hidden );
-  const uint16 b_m_hidden_bit            = _uint16_and( is_b_e_nez, h_m_hidden );
-  const uint16 a_m_no_grs                = _uint16_or( a_m_no_hidden_bit, a_m_hidden_bit );
-  const uint16 b_m_no_grs                = _uint16_or( b_m_no_hidden_bit, b_m_hidden_bit );
-  const uint16 diff_e                    = _uint16_sub( a_e,        b_e );
-  const uint16 a_e_unbias                = _uint16_sub( a_e,        h_e_bias_minus_one );
-  const uint16 a_m                       = _uint16_sll( a_m_no_grs, h_grs_size );
-  const uint16 a_e_biased                = _uint16_srl( a_e,        h_e_pos );
-  const uint16 m_sa_unbias               = _uint16_srl( a_e_unbias, h_e_pos );
-  const uint16 m_sa_default              = _uint16_srl( diff_e,     h_e_pos );
-  const uint16 m_sa_unbias_mask          = _uint16_andc( is_a_e_nez_msb,   is_b_e_nez_msb );
-  const uint16 m_sa                      = _uint16_sels( m_sa_unbias_mask, m_sa_unbias, m_sa_default );
-  const uint16 b_m_no_sticky             = _uint16_sll( b_m_no_grs,        h_grs_size );
-  const uint16 sh_m                      = _uint16_srl( b_m_no_sticky,     m_sa );
-  const uint16 sticky_overflow           = _uint16_sll( one,               m_sa );
-  const uint16 sticky_mask               = _uint16_dec( sticky_overflow );
-  const uint16 sticky_collect            = _uint16_and( b_m_no_sticky, sticky_mask );
-  const uint16 is_sticky_set_msb         = _uint16_neg( sticky_collect );
-  const uint16 sticky                    = _uint16_srl( is_sticky_set_msb, msb_to_lsb_sa);
-  const uint16 b_m                       = _uint16_or( sh_m, sticky );
-  const uint16 is_c_m_ab_pos_msb         = _uint16_sub( b_m, a_m );
-  const uint16 c_inf                     = _uint16_or( a_s, h_e_mask );
-  const uint16 c_m_sum                   = _uint16_add( a_m, b_m );
-  const uint16 c_m_diff_ab               = _uint16_sub( a_m, b_m );
-  const uint16 c_m_diff_ba               = _uint16_sub( b_m, a_m );
-  const uint16 c_m_smag_diff             = _uint16_sels( is_c_m_ab_pos_msb, c_m_diff_ab, c_m_diff_ba );
-  const uint16 c_s_diff                  = _uint16_sels( is_c_m_ab_pos_msb, a_s,         b_s         );
-  const uint16 c_s                       = _uint16_sels( is_diff_sign_msb,  c_s_diff,    a_s         );
-  const uint16 c_m_smag_diff_nlz         = _uint16_cntlz( c_m_smag_diff );
-  const uint16 diff_norm_sa              = _uint16_sub( c_m_smag_diff_nlz, one );
-  const uint16 is_diff_denorm_msb        = _uint16_sub( a_e_biased, diff_norm_sa );
-  const uint16 is_diff_denorm            = _uint16_ext( is_diff_denorm_msb );
-  const uint16 is_a_or_b_norm_msb        = _uint16_neg( a_e_biased );
-  const uint16 diff_denorm_sa            = _uint16_dec( a_e_biased );
-  const uint16 c_m_diff_denorm           = _uint16_sll( c_m_smag_diff, diff_denorm_sa );
-  const uint16 c_m_diff_norm             = _uint16_sll( c_m_smag_diff, diff_norm_sa );
-  const uint16 c_e_diff_norm             = _uint16_sub( a_e_biased,  diff_norm_sa );
-  const uint16 c_m_diff_ab_norm          = _uint16_sels( is_diff_denorm_msb, c_m_diff_denorm, c_m_diff_norm );
-  const uint16 c_e_diff_ab_norm          = _uint16_andc( c_e_diff_norm, is_diff_denorm );
-  const uint16 c_m_diff                  = _uint16_sels( is_a_or_b_norm_msb, c_m_diff_ab_norm, c_m_smag_diff );
-  const uint16 c_e_diff                  = _uint16_sels( is_a_or_b_norm_msb, c_e_diff_ab_norm, a_e_biased    );
-  const uint16 is_diff_eqz_msb           = _uint16_dec( c_m_diff );
-  const uint16 is_diff_exactly_zero_msb  = _uint16_and( is_diff_sign_msb, is_diff_eqz_msb );
-  const uint16 is_diff_exactly_zero      = _uint16_ext( is_diff_exactly_zero_msb );
-  const uint16 c_m_added                 = _uint16_sels( is_diff_sign_msb, c_m_diff, c_m_sum );
-  const uint16 c_e_added                 = _uint16_sels( is_diff_sign_msb, c_e_diff, a_e_biased );
-  const uint16 c_m_carry                 = _uint16_and( c_m_added, h_m_grs_carry );
-  const uint16 is_c_m_carry_msb          = _uint16_neg( c_m_carry );
-  const uint16 c_e_hidden_offset         = _uint16_andsrl( c_m_added, h_m_grs_carry, h_m_grs_carry_pos );
-  const uint16 c_m_sub_hidden            = _uint16_srl( c_m_added, one );
-  const uint16 c_m_no_hidden             = _uint16_sels( is_c_m_carry_msb, c_m_sub_hidden, c_m_added );
-  const uint16 c_e_no_hidden             = _uint16_add( c_e_added,         c_e_hidden_offset  );
-  const uint16 c_m_no_hidden_msb         = _uint16_and( c_m_no_hidden,     h_m_msb_mask       );
-  const uint16 undenorm_m_msb_odd        = _uint16_srl( c_m_no_hidden_msb, h_m_msb_sa         );
-  const uint16 undenorm_fix_e            = _uint16_and( is_undenorm,       undenorm_m_msb_odd );
-  const uint16 c_e_fixed                 = _uint16_add( c_e_no_hidden,     undenorm_fix_e     );
-  const uint16 c_m_round_amount          = _uint16_and( c_m_no_hidden,     h_grs_round_mask   );
-  const uint16 c_m_rounded               = _uint16_add( c_m_no_hidden,     c_m_round_amount   );
-  const uint16 c_m_round_overflow        = _uint16_andsrl( c_m_rounded, h_m_grs_carry, h_m_grs_carry_pos );
-  const uint16 c_e_rounded               = _uint16_add( c_e_fixed, c_m_round_overflow );
-  const uint16 c_m_no_grs                = _uint16_srlm( c_m_rounded, h_grs_size,  h_m_mask );
-  const uint16 c_e                       = _uint16_sll( c_e_rounded, h_e_pos );
-  const uint16 c_em                      = _uint16_or( c_e, c_m_no_grs );
-  const uint16 c_normal                  = _uint16_or( c_s, c_em );
-  const uint16 c_inf_result              = _uint16_sels( is_a_inf_msb, c_inf, c_normal );
-  const uint16 c_zero_result             = _uint16_andc( c_inf_result, is_diff_exactly_zero );
-  const uint16 c_result                  = _uint16_sels( is_invalid_inf_op_msb, h_snan, c_zero_result );
-
-  return (c_result);
-}
--- a/src/nvmath/Half.h
+++ b/src/nvmath/Half.h
@ -1,9 +0,0 @@
-#ifndef NV_MATH_HALF_H
-#define NV_MATH_HALF_H
-
-#include <nvmath/nvmath.h>
-
-uint32 half_to_float( uint16 h );
-uint16 half_from_float( uint32 f );
-
-#endif /* NV_MATH_HALF_H */
--- a/src/nvmath/KahanSum.h
+++ b/src/nvmath/KahanSum.h
@ -1,38 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#ifndef NV_MATH_KAHANSUM_H
-#define NV_MATH_KAHANSUM_H
-
-#include <nvmath/nvmath.h>
-
-namespace nv
-{
-
-	class KahanSum
-	{
-	public:
-		KahanSum() : accum(0.0f), err(0) {};
-		
-		void add(float f)
-		{
-			float compensated = f + err;
-			float tmp = accum + compensated;
-			err = accum - tmp;
-			err += compensated;
-			accum = tmp;
-		}
-		
-		float sum() const
-		{
-			return accum;
-		}
-		
-	private:
-		float accum;
-		float err;
-	};
-	
-} // nv namespace
-
-
-#endif // NV_MATH_KAHANSUM_H
--- a/src/nvmath/Polygon.cpp
+++ b/src/nvmath/Polygon.cpp
@ -1,168 +0,0 @@
-// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
-
-#include <nvmath/Polygon.h>
-
-#include <nvmath/Triangle.h>
-#include <nvmath/Plane.h>
-
-using namespace nv;
-
-
-Polygon::Polygon()
-{
-}
-
-Polygon::Polygon(const Triangle & t)
-{
-	pointArray.resize(3);
-	pointArray[0] = t.v[0];
-	pointArray[1] = t.v[1];
-	pointArray[2] = t.v[2];
-}
-
-Polygon::Polygon(const Vector3 * points, uint vertexCount)
-{
-	pointArray.resize(vertexCount);
-	
-	for (uint i = 0; i < vertexCount; i++)
-	{
-		pointArray[i] = points[i];
-	}
-}
-
-
-/// Compute polygon area.
-float Polygon::area() const
-{
-	float total = 0;
-	
-	const uint pointCount = pointArray.count();
-    for (uint i = 2; i < pointCount; i++)
-    {
-		Vector3 v1 = pointArray[i-1] - pointArray[0];
-		Vector3 v2 = pointArray[i] - pointArray[0];
-		
-		total += 0.5f * length(cross(v1, v2));
-    }
-	
-    return total;	
-}
-
-/// Get the bounds of the polygon.
-Box Polygon::bounds() const
-{
-	Box bounds;
-	bounds.clearBounds();
-	foreach(p, pointArray)
-	{
-		bounds.addPointToBounds(pointArray[p]);
-	}
-	return bounds;
-}
-
-
-/// Get the plane of the polygon.
-Plane Polygon::plane() const
-{
-	// @@ Do something better than this?
-	Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]);
-	return Vector4(n, dot(n, pointArray[0]));
-}
-
-
-/// Clip polygon to box.
-uint Polygon::clipTo(const Box & box)
-{
-	const Plane posX( 1, 0, 0, box.maxCorner().x());
-	const Plane negX(-1, 0, 0,-box.minCorner().x());
-	const Plane posY( 0, 1, 0, box.maxCorner().y());
-	const Plane negY( 0,-1, 0,-box.minCorner().y());
-	const Plane posZ( 0, 0, 1, box.maxCorner().z());
-	const Plane negZ( 0, 0,-1,-box.minCorner().z());
-
-	if (clipTo(posX) == 0) return 0;
-	if (clipTo(negX) == 0) return 0;
-	if (clipTo(posY) == 0) return 0;
-	if (clipTo(negY) == 0) return 0;
-	if (clipTo(posZ) == 0) return 0;
-	if (clipTo(negZ) == 0) return 0;
-	
-	return pointArray.count();
-}
-
-
-/// Clip polygon to plane.
-uint Polygon::clipTo(const Plane & plane)
-{
-	int count = 0;
-
-	const uint pointCount = pointArray.count();
-	
-	Array<Vector3> newPointArray(pointCount + 1);	// @@ Do not create copy every time.
-	
-	Vector3 prevPoint = pointArray[pointCount - 1];
-	float prevDist = dot(plane.vector(), prevPoint) - plane.offset();
-	
-	for (uint i = 0; i < pointCount; i++)
-	{
-		const Vector3 point = pointArray[i];
-		float dist = dot(plane.vector(), point) - plane.offset();
-		
-		// @@ Handle points on plane better.
-		
-		if (dist <= 0) // interior.
-		{
-			if (prevDist > 0) // exterior
-			{
-				// Add segment intersection point.
-				Vector3 dp = point - prevPoint;
-				
-				float t = dist / prevDist;
-				newPointArray.append(point - dp * t);
-			}
-			
-			// Add interior point.
-			newPointArray.append(point);
-		}
-		else if (dist > 0 && prevDist < 0)
-		{
-			// Add segment intersection point.
-			Vector3 dp = point - prevPoint;
-			
-			float t = dist / prevDist;
-			newPointArray.append(point - dp * t);
-		}
-		
-		prevPoint = point;
-		prevDist = dist;
-	}
-
-	swap(pointArray, newPointArray);
-
-	return count;
-}
-
-
-void Polygon::removeColinearPoints()
-{
-	const uint pointCount = pointArray.count();
-	
-	Array<Vector3> newPointArray(pointCount);
-	
-    for (uint i = 0 ; i < pointCount; i++)
-    {
-        int j = (i + 1) % pointCount;
-        int k = (i + pointCount - 1) % pointCount;
-		
-		Vector3 v1 = normalize(pointArray[j] - pointArray[i]);
-		Vector3 v2 = normalize(pointArray[i] - pointArray[k]);
-		
-        if (dot(v1, v2) < 0.999)
-        {
-			newPointArray.append(pointArray[i]);
-        }
-    }
-
-	swap(pointArray, newPointArray);
-}
-
--- a/src/nvmath/Polygon.h
+++ b/src/nvmath/Polygon.h
@ -1,45 +0,0 @@
-// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
-
-#ifndef NV_MATH_POLYGON_H
-#define NV_MATH_POLYGON_H
-
-#include <nvcore/Containers.h>
-
-#include <nvmath/nvmath.h>
-#include <nvmath/Vector.h>
-#include <nvmath/Box.h>
-
-namespace nv
-{
-	class Box;
-	class Plane;
-	class Triangle;
-	
-
-	class Polygon
-	{
-		NV_FORBID_COPY(Polygon);
-	public:
-
-		Polygon();
-		Polygon(const Triangle & t);
-		Polygon(const Vector3 * points, uint vertexCount);
-
-		float area() const;
-		Box bounds() const;
-		Plane plane() const;
-
-		uint clipTo(const Box & box);
-		uint clipTo(const Plane & plane);
-
-		void removeColinearPoints();
-		
-	private:
-
-		Array<Vector3> pointArray;
-	};
-
-
-} // nv namespace
-
-#endif	// NV_MATH_POLYGON_H
--- a/src/nvmath/Quaternion.h
+++ b/src/nvmath/Quaternion.h
@ -51,6 +51,7 @@ namespace nv

 	inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
 	{
+		// @@ Efficient SIMD implementation?
 		return Quaternion(
 			+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
 			- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
@ -58,40 +59,6 @@ namespace nv
 			- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
 	}

-	inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
-	{
-		return Quaternion(
-			                + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
-			- a.x() * b.z()               + a.z()*b.x() + a.w()*b.y(),
-			+ a.x() * b.y() - a.y()*b.x()               + a.w()*b.z(),
-			- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() );
-	}
-
-	inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
-	{
-		return Quaternion(
-			+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y(),
-			- a.x() * b.z() + a.y()*b.w() + a.z()*b.x(),
-			+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w(),
-			- a.x() * b.x() - a.y()*b.y() - a.z()*b.z());
-	}
-
-	inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
-	{
-		return mul(a, b);
-	}
-
-	inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
-	{
-		return mul(a, b);
-	}
-
-	inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
-	{
-		return mul(a, b);
-	}
-
-
 	inline Quaternion scale(Quaternion::Arg q, float s)
 	{
 		return scale(q.asVector(), s);
@ -155,24 +122,6 @@ namespace nv
 		return Quaternion(Vector4(v * s, c));
 	}

-	inline Vector3 imag(Quaternion::Arg q)
-	{
-		return q.asVector().xyz();
-	}
-
-	inline float real(Quaternion::Arg q)
-	{
-		return q.w();
-	}
-
-
-	/// Transform vector.
-	inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
-	{
-		Quaternion t = q * v * conjugate(q);
-		return imag(t);
-	}
-

 } // nv namespace

--- a/src/nvmath/Solver.cpp
+++ b/src/nvmath/Solver.cpp
@ -1,726 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#include <nvmath/Solver.h>
-
-using namespace nv;
-
-namespace
-{
-	class Preconditioner
-	{
-	public:
-		// Virtual dtor.
-		virtual ~Preconditioner() { }
-	 
-		// Apply preconditioning step.
-		virtual void apply(const FullVector & x, FullVector & y) const = 0;
-	};
-
-
-	// Jacobi preconditioner.
-	class JacobiPreconditioner : public Preconditioner
-	{
-	public:
-
-		JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
-		{
-			nvCheck(M.isSquare());
-			
-			for(uint x = 0; x < M.width(); x++)
-			{
-				float elem = M.getCoefficient(x, x);
-				nvDebugCheck( elem != 0.0f );
-
-				if (symmetric) 
-				{
-					m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem));
-				}
-				else 
-				{
-					m_inverseDiagonal[x] = 1.0f / elem;
-				}
-			}
-		}
-
-		void apply(const FullVector & x, FullVector & y) const
-		{
-			y *= x;
-		}
-
-	private:
-
-		FullVector m_inverseDiagonal;
-
-	};
-
-} // namespace
-
-
-static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
-static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
-
-
-// Solve the symmetric system: At<41>A<EFBFBD>x = At<41>b
-void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
-{
-	nvDebugCheck(A.width() == x.dimension());
-	nvDebugCheck(A.height() == b.dimension());
-	nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
-
-	const uint D = A.width();
-
-	FullVector Atb(D);
-	mult(Transposed, A, b, Atb);
-
-	SparseMatrix AtA(D);
-	mult(Transposed, A, NoTransposed, A, AtA);
-
-	SymmetricSolver(AtA, Atb, x, epsilon);
-}
-
-
-// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
-void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
-{
-	nvDebugCheck(A.width() == x.dimension());
-	nvDebugCheck(A.height() == b.dimension());
-	nvDebugCheck(A.height() >= A.width() - lockedCount);
-
-	// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
-
-	const uint D = A.width() - lockedCount;
-	nvDebugCheck(D > 0);
-
-	// Compute: b - Al * xl
-	FullVector b_Alxl(b);
-
-	for (uint y = 0; y < A.height(); y++)
-	{
-		const uint count = A.getRow(y).count();
-		for (uint e = 0; e < count; e++)
-		{
-			uint column = A.getRow(y)[e].x;
-
-			bool isFree = true;
-			for (uint i = 0; i < lockedCount; i++) 
-			{
-				isFree &= (lockedParameters[i] != column);
-			}
-
-			if (!isFree)
-			{
-				b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
-			}
-		}
-	}
-
-	// Remove locked columns from A.
-	SparseMatrix Af(D, A.height());
-
-	for (uint y = 0; y < A.height(); y++)
-	{
-		const uint count = A.getRow(y).count();
-		for (uint e = 0; e < count; e++)
-		{
-			uint column = A.getRow(y)[e].x;
-			uint ix = column;
-
-			bool isFree = true;
-			for (uint i = 0; i < lockedCount; i++) 
-			{
-				isFree &= (lockedParameters[i] != column);
-				if (column > lockedParameters[i]) ix--; // shift columns
-			}
-
-			if (isFree)
-			{
-				Af.setCoefficient(ix, y, A.getRow(y)[e].v);
-			}
-		}
-	}
-	
-	// Remove elements from x
-	FullVector xf(D);
-
-	for (uint i = 0, j = 0; i < A.width(); i++)
-	{
-		bool isFree = true;
-		for (uint l = 0; l < lockedCount; l++) 
-		{
-			isFree &= (lockedParameters[l] != i);
-		}
-
-		if (isFree)
-		{
-			xf[j++] = x[i];
-		}
-	}
-
-	// Solve reduced system.
-	LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
-
-	// Copy results back to x.
-	for (uint i = 0, j = 0; i < A.width(); i++)
-	{
-		bool isFree = true;
-		for (uint l = 0; l < lockedCount; l++) 
-		{
-			isFree &= (lockedParameters[l] != i);
-		}
-
-		if (isFree)
-		{
-			x[i] = xf[j++];
-		}
-	}
-}
-
-
-void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
-{
-	nvDebugCheck(A.height() == A.width());
-	nvDebugCheck(A.height() == b.dimension());
-	nvDebugCheck(b.dimension() == x.dimension());
-
-//	JacobiPreconditioner jacobi(A, true);
-
-//	ConjugateGradientSolver(jacobi, A, b, x, epsilon);
-	ConjugateGradientSolver(A, b, x, epsilon);
-}
-
-
-/**
- * Compute the solution of the sparse linear system Ab=x using the Conjugate
- * Gradient method.
- *
- * Solving sparse linear systems:
- * (1)		A<>x = b
- * 
- * The conjugate gradient algorithm solves (1) only in the case that A is 
- * symmetric and positive definite. It is based on the idea of minimizing the 
- * function
- * 
- * (2)		f(x) = 1/2<>x<EFBFBD>A<EFBFBD>x - b<>x
- * 
- * This function is minimized when its gradient
- * 
- * (3)		df = A<>x - b
- * 
- * is zero, which is equivalent to (1). The minimization is carried out by 
- * generating a succession of search directions p.k and improved minimizers x.k. 
- * At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k<>p.k), 
- * and x.k+1 is set equal to the new point x.k + alfa.k<>p.k. The p.k and x.k are 
- * built up in such a way that x.k+1 is also the minimizer of f over the whole
- * vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N 
- * iterations you arrive at the minimizer over the entire vector space, i.e., the 
- * solution to (1).
- *
- * For a really good explanation of the method see:
- *
- * "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
- * Jonhathan Richard Shewchuk.
- *
-**/
-/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
-{
-	nvDebugCheck( A.isSquare() );
-	nvDebugCheck( A.width() == b.dimension() );
-	nvDebugCheck( A.width() == x.dimension() );
-
-	int i = 0;
-	const int D = A.width();
-	const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
-
-	FullVector r(D);   // residual
-	FullVector p(D);   // search direction
-	FullVector q(D);   // 
-	float delta_0;
-	float delta_old;
-	float delta_new;
-	float alpha;
-	float beta;
-
-	// r = b - A<>x;
-	copy(b, r);
-	sgemv(-1, A, x, 1, r);
-
-	// p = r;
-	copy(r, p);
-	
-	delta_new = dot( r, r );
-	delta_0 = delta_new;
-
-	while (i < i_max && delta_new > epsilon*epsilon*delta_0)
-	{
-		i++;
-
-		// q = A<>p
-		mult(A, p, q);
-
-		// alpha = delta_new / p<>q
-		alpha = delta_new / dot( p, q );
-
-		// x = alfa<66>p + x
-		saxpy(alpha, p, x);
-
-		if ((i & 31) == 0) // recompute r after 32 steps
-		{
-			// r = b - A<>x
-			copy(b, r);
-			sgemv(-1, A, x, 1, r);
-		}
-		else
-		{
-			// r = r - alpha<68>q
-			saxpy(-alpha, q, r);
-		}
-
-		delta_old = delta_new;
-		delta_new = dot( r, r );
-
-		beta = delta_new / delta_old;
-
-		// p = r + beta<74>p
-		copy(r, p);
-		saxpy(beta, p, r);
-	}
-
-	return i;
-}
-
-
-// Conjugate gradient with preconditioner.
-/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
-{
-	nvDebugCheck( A.isSquare() );
-	nvDebugCheck( A.width() == b.dimension() );
-	nvDebugCheck( A.width() == x.dimension() );
-
-	int i = 0;
-	const int D = A.width();
-	const int i_max = 4 * D;   // Convergence should be linear, but in some cases, it's not.
-
-	FullVector r(D);    // residual
-	FullVector p(D);    // search direction
-	FullVector q(D);    // 
-	FullVector s(D);    // preconditioned
-	float delta_0;
-	float delta_old;
-	float delta_new;
-	float alpha;
-	float beta;
-
-	// r = b - A<>x
-	copy(b, r);
-	sgemv(-1, A, x, 1, r);
-
-
-	// p = M^-1 <20> r
-	preconditioner.apply(r, p);
-	//copy(r, p);
-
-
-	delta_new = dot(r, p);
-	delta_0 = delta_new;
-
-	while (i < i_max && delta_new > epsilon*epsilon*delta_0)
-	{
-		i++;
-
-		// q = A<>p
-		mult(A, p, q);
-
-		// alpha = delta_new / p<>q
-		alpha = delta_new / dot(p, q);
-
-		// x = alfa<66>p + x
-		saxpy(alpha, p, x);
-
-		if ((i & 31) == 0)  // recompute r after 32 steps
-		{			
-			// r = b - A<>x
-			copy(b, r);
-			sgemv(-1, A, x, 1, r);
-		}
-		else
-		{
-			// r = r - alfa<66>q
-			saxpy(-alpha, q, r);
-		}
-
-		// s = M^-1 <20> r
-		preconditioner.apply(r, s);
-		//copy(r, s);
-
-		delta_old = delta_new;
-		delta_new = dot( r, s );
-
-		beta = delta_new / delta_old;
-
-		// p = s + beta<74>p
-		copy(s, p);
-		saxpy(beta, p, s);
-	}
-
-	return i;
-}
-
-
-#if 0 // Nonsymmetric solvers
-
-/** Bi-conjugate gradient method.  */
-MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
-	piDebugCheck( A.IsSquare() );
-	piDebugCheck( A.Width() == b.Dim() );
-	piDebugCheck( A.Width() == x.Dim() );
-
-	int i = 0;
-	const int D = A.Width();
-	const int i_max = 4 * D;
-
-	float resid;
-	float rho_1 = 0;
-	float rho_2 = 0;
-	float alpha;
-	float beta;
-
-	DenseVector r(D);
-	DenseVector rtilde(D);
-	DenseVector p(D);
-	DenseVector ptilde(D);
-	DenseVector q(D);
-	DenseVector qtilde(D);
-	DenseVector tmp(D);	// temporal vector.
-
-	// r = b - A<>x;
-	A.Product( x, tmp );
-	r.Sub( b, tmp );
-
-	// rtilde = r
-	rtilde.Set( r );
-
-	// p = r;
-	p.Set( r );
-
-	// ptilde = rtilde
-	ptilde.Set( rtilde );
-
-
-
-	float normb = b.Norm();
-  	if( normb == 0.0 ) normb = 1;
-
-	// test convergence
-	resid = r.Norm() / normb;
-	if( resid < epsilon ) {
-		// method converges?
-		return 0;
-	}
-
-
-	while( i < i_max ) {
-
-		i++;
-		
-		rho_1 = DenseVectorDotProduct( r, rtilde );
-
-		if( rho_1 == 0 ) {
-			// method fails.
-			return -i;
-		}
-
-		if (i == 1) {
-			p.Set( r );
-			ptilde.Set( rtilde );
-		} 
-		else {
-			beta = rho_1 / rho_2;
-			
-			// p = r + beta * p;
-			p.Mad( r, p, beta );
-			
-			// ptilde = ztilde + beta * ptilde;
-			ptilde.Mad( rtilde, ptilde, beta );
-		}
-
-		// q = A * p;
-		A.Product( p, q );
-
-		// qtilde = A^t * ptilde;
-		A.TransProduct( ptilde, qtilde );
-
-		alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
-
-		// x += alpha * p;
-		x.Mad( x, p, alpha );
-
-		// r -= alpha * q;
-		r.Mad( r, q, -alpha );
-		
-		// rtilde -= alpha * qtilde;
-		rtilde.Mad( rtilde, qtilde, -alpha );
-
-		rho_2 = rho_1;
-
-		// test convergence
-		resid = r.Norm() / normb;
-		if( resid < epsilon ) {
-			// method converges
-			return i;
-		}
-	}
-
-	return i;
-}
-
-
-/** Bi-conjugate gradient stabilized method. */
-int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
-	piDebugCheck( A.IsSquare() );
-	piDebugCheck( A.Width() == b.Dim() );
-	piDebugCheck( A.Width() == x.Dim() );
-
-	int i = 0;
-	const int D = A.Width();
-	const int i_max = 2 * D;
-
-
-	float resid;
-	float rho_1 = 0;
-	float rho_2 = 0;
-	float alpha = 0;
-	float beta = 0;
-	float omega = 0;
-
-	DenseVector p(D);
-	DenseVector phat(D);
-	DenseVector s(D);
-	DenseVector shat(D);
-	DenseVector t(D);
-	DenseVector v(D);
-
-	DenseVector r(D);
-	DenseVector rtilde(D);
-
-	DenseVector tmp(D);
-
-	// r = b - A<>x;
-	A.Product( x, tmp );
-	r.Sub( b, tmp );
-
-	// rtilde = r
-	rtilde.Set( r );
-
-
-	float normb = b.Norm();
-	if( normb == 0.0 ) normb = 1;
-  
-	// test convergence
-	resid = r.Norm() / normb;
-	if( resid < epsilon ) {
-		// method converges?
-		return 0;
-	}
-
-
-	while( i<i_max ) {
-
-		i++;
-	
-		rho_1 = DenseVectorDotProduct( rtilde, r );
-		if( rho_1 == 0 ) {
-			// method fails
-			return -i;
-		}
-
-
-		if( i == 1 ) {
-			p.Set( r );
-		}
-		else {
-			beta = (rho_1 / rho_2) * (alpha / omega);
-
-			// p = r + beta * (p - omega * v);
-			p.Mad( p, v, -omega );
-			p.Mad( r, p, beta );
-		}
-
-		//phat = M.solve(p);
-		phat.Set( p );
-		//Precond( &phat, p );
-
-		//v = A * phat;
-		A.Product( phat, v );
-
-		alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
-
-		// s = r - alpha * v;
-		s.Mad( r, v, -alpha );
-
-
-		resid = s.Norm() / normb;
-		if( resid < epsilon ) {
-			// x += alpha * phat;
-			x.Mad( x, phat, alpha );
-			return i;
-		}
-
-		//shat = M.solve(s);
-		shat.Set( s );
-		//Precond( &shat, s );
-
-		//t = A * shat;
-		A.Product( shat, t );
-
-		omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
-
-		// x += alpha * phat + omega * shat;
-		x.Mad( x, shat, omega );
-		x.Mad( x, phat, alpha );
-
-		//r = s - omega * t;
-		r.Mad( s, t, -omega );
-
-		rho_2 = rho_1;
-		
-		resid = r.Norm() / normb;
-		if( resid < epsilon ) {
-			return i;
-		}
-
-		if( omega == 0 ) {
-			return -i;	// ???
-		}
-	}
-
-	return i;
-}
-
-
-/** Bi-conjugate gradient stabilized method. */
-int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
-	piDebugCheck( A.IsSquare() );
-	piDebugCheck( A.Width() == b.Dim() );
-	piDebugCheck( A.Width() == x.Dim() );
-
-	int i = 0;
-	const int D = A.Width();
-	const int i_max = D;
-//	const int i_max = 1000;
-
-
-	float resid;
-	float rho_1 = 0;
-	float rho_2 = 0;
-	float alpha = 0;
-	float beta = 0;
-	float omega = 0;
-
-	DenseVector p(D);
-	DenseVector phat(D);
-	DenseVector s(D);
-	DenseVector shat(D);
-	DenseVector t(D);
-	DenseVector v(D);
-
-	DenseVector r(D);
-	DenseVector rtilde(D);
-
-	DenseVector tmp(D);
-
-	// r = b - A<>x;
-	A.Product( x, tmp );
-	r.Sub( b, tmp );
-
-	// rtilde = r
-	rtilde.Set( r );
-
-
-	float normb = b.Norm();
-	if( normb == 0.0 ) normb = 1;
-  
-	// test convergence
-	resid = r.Norm() / normb;
-	if( resid < epsilon ) {
-		// method converges?
-		return 0;
-	}
-
-
-	while( i<i_max ) {
-
-		i++;
-	
-		rho_1 = DenseVectorDotProduct( rtilde, r );
-		if( rho_1 == 0 ) {
-			// method fails
-			return -i;
-		}
-
-
-		if( i == 1 ) {
-			p.Set( r );
-		}
-		else {
-			beta = (rho_1 / rho_2) * (alpha / omega);
-
-			// p = r + beta * (p - omega * v);
-			p.Mad( p, v, -omega );
-			p.Mad( r, p, beta );
-		}
-
-		//phat = M.solve(p);
-		//phat.Set( p );
-		M.Precond( &phat, p );
-
-		//v = A * phat;
-		A.Product( phat, v );
-
-		alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
-
-		// s = r - alpha * v;
-		s.Mad( r, v, -alpha );
-
-
-		resid = s.Norm() / normb;
-
-		//printf( "--- Iteration %d: residual = %f\n", i, resid );
-
-		if( resid < epsilon ) {
-			// x += alpha * phat;
-			x.Mad( x, phat, alpha );
-			return i;
-		}
-
-		//shat = M.solve(s);
-		//shat.Set( s );
-		M.Precond( &shat, s );
-
-		//t = A * shat;
-		A.Product( shat, t );
-
-		omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
-
-		// x += alpha * phat + omega * shat;
-		x.Mad( x, shat, omega );
-		x.Mad( x, phat, alpha );
-
-		//r = s - omega * t;
-		r.Mad( s, t, -omega );
-
-		rho_2 = rho_1;
-		
-		resid = r.Norm() / normb;
-		if( resid < epsilon ) {
-			return i;
-		}
-
-		if( omega == 0 ) {
-			return -i;	// ???
-		}
-	}
-
-	return i;
-}
-
-#endif
--- a/src/nvmath/Solver.h
+++ b/src/nvmath/Solver.h
@ -1,20 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#ifndef NV_MATH_SOLVER_H
-#define NV_MATH_SOLVER_H
-
-#include <nvmath/Sparse.h>
-
-namespace nv
-{
-
-	// Linear solvers.
-	NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
-	NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
-	NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
-//	NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
-
-} // nv namespace
-
-
-#endif // NV_MATH_SOLVER_H
--- a/src/nvmath/Sparse.cpp
+++ b/src/nvmath/Sparse.cpp
@ -1,831 +0,0 @@
-// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
-
-#include <nvmath/Sparse.h>
-#include <nvmath/KahanSum.h>
-
-using namespace nv;
-
-
-/// Ctor.
-FullVector::FullVector(uint dim)
-{ 
-	m_array.resize(dim); 
-}
-
-/// Copy ctor.
-FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
-{
-}
-
-/// Copy operator
-const FullVector & FullVector::operator=(const FullVector & v)
-{
-	nvCheck(dimension() == v.dimension());
-	
-	m_array = v.m_array;
-
-	return *this;
-}
-		
-
-void FullVector::fill(float f)
-{
-	const uint dim = dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		m_array[i] = f;
-	}
-}
-
-void FullVector::operator+= (const FullVector & v)
-{
-	nvDebugCheck(dimension() == v.dimension());
-
-	const uint dim = dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		m_array[i] += v.m_array[i];
-	}
-}
-
-void FullVector::operator-= (const FullVector & v)
-{
-	nvDebugCheck(dimension() == v.dimension());
-
-	const uint dim = dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		m_array[i] -= v.m_array[i];
-	}
-}
-
-void FullVector::operator*= (const FullVector & v)
-{
-	nvDebugCheck(dimension() == v.dimension());
-
-	const uint dim = dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		m_array[i] *= v.m_array[i];
-	}
-}
-
-void FullVector::operator+= (float f)
-{
-	const uint dim = dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		m_array[i] += f;
-	}
-}
-
-void FullVector::operator-= (float f)
-{
-	const uint dim = dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		m_array[i] -= f;
-	}
-}
-
-void FullVector::operator*= (float f)
-{
-	const uint dim = dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		m_array[i] *= f;
-	}
-}
-
-
-void nv::saxpy(float a, const FullVector & x, FullVector & y)
-{
-	nvDebugCheck(x.dimension() == y.dimension());
-
-	const uint dim = x.dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		y[i] += a * x[i];
-	}
-}
-
-void nv::copy(const FullVector & x, FullVector & y)
-{
-	nvDebugCheck(x.dimension() == y.dimension());
-
-	const uint dim = x.dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		y[i] = x[i];
-	}
-}
-
-void nv::scal(float a, FullVector & x)
-{
-	const uint dim = x.dimension();
-	for (uint i = 0; i < dim; i++)
-	{
-		x[i] *= a;
-	}
-}
-
-float nv::dot(const FullVector & x, const FullVector & y)
-{
-	nvDebugCheck(x.dimension() == y.dimension());
-
-	const uint dim = x.dimension();
-
-	/*float sum = 0;
-	for (uint i = 0; i < dim; i++)
-	{
-		sum += x[i] * y[i];
-	}
-	return sum;*/
-
-	KahanSum kahan;
-
-	for (uint i = 0; i < dim; i++)
-	{
-		kahan.add(x[i] * y[i]);
-	}
-
-	return kahan.sum();
-}
-
-
-FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
-{
-	m_array.resize(d*d, 0.0f);
-}
-
-FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
-{
-	m_array.resize(w*h, 0.0f);
-}
-
-FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
-{
-	m_array = m.m_array;
-}
-
-const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
-{
-	nvCheck(width() == m.width());
-	nvCheck(height() == m.height());
-
-	m_array = m.m_array;
-	
-	return *this;
-}
-
-
-float FullMatrix::getCoefficient(uint x, uint y) const
-{
-	nvDebugCheck( x < width() );
-	nvDebugCheck( y < height() );
-	
-	return m_array[y * width() + x];
-}
-
-void FullMatrix::setCoefficient(uint x, uint y, float f)
-{
-	nvDebugCheck( x < width() );
-	nvDebugCheck( y < height() );
-	
-	m_array[y * width() + x] = f;
-}
-
-void FullMatrix::addCoefficient(uint x, uint y, float f)
-{
-	nvDebugCheck( x < width() );
-	nvDebugCheck( y < height() );
-	
-	m_array[y * width() + x] += f;
-}
-
-void FullMatrix::mulCoefficient(uint x, uint y, float f)
-{
-	nvDebugCheck( x < width() );
-	nvDebugCheck( y < height() );
-	
-	m_array[y * width() + x] *= f;
-}
-
-float FullMatrix::dotRow(uint y, const FullVector & v) const
-{
-	nvDebugCheck( v.dimension() == width() );
-	nvDebugCheck( y < height() );
-
-	float sum = 0;
-
-	const uint count = v.dimension();
-	for (uint i = 0; i < count; i++)
-	{
-		sum += m_array[y * count + i] * v[i];
-	}
-
-	return sum;
-}
-
-void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
-{
-	nvDebugCheck( v.dimension() == width() );
-	nvDebugCheck( y < height() );
-
-	const uint count = v.dimension();
-	for (uint i = 0; i < count; i++)
-	{
-		v[i] += m_array[y * count + i];
-	}
-}
-
-
-// y = M * x
-void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
-{
-	mult(NoTransposed, M, x, y);
-}
-
-void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
-{
-	const uint w = M.width();
-	const uint h = M.height();
-
-	if (TM == Transposed)
-	{
-		nvDebugCheck( h == x.dimension() );
-		nvDebugCheck( w == y.dimension() );
-
-		y.fill(0.0f);
-
-		for (uint i = 0; i < h; i++)
-		{
-			M.madRow(i, x[i], y);
-		}
-	}
-	else
-	{
-		nvDebugCheck( w == x.dimension() );
-		nvDebugCheck( h == y.dimension() );
-
-		for (uint i = 0; i < h; i++)
-		{
-			y[i] = M.dotRow(i, x);
-		}
-	}
-}
-
-// y = alpha*A*x + beta*y
-void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
-{
-	sgemv(alpha, NoTransposed, A, x, beta, y);
-}
-
-void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
-{
-	const uint w = A.width();
-	const uint h = A.height();
-
-	if (TA == Transposed)
-	{
-		nvDebugCheck( h == x.dimension() );
-		nvDebugCheck( w == y.dimension() );
-
-		for (uint i = 0; i < h; i++)
-		{
-			A.madRow(i, alpha * x[i], y);
-		}
-	}
-	else
-	{
-		nvDebugCheck( w == x.dimension() );
-		nvDebugCheck( h == y.dimension() );
-
-		for (uint i = 0; i < h; i++)
-		{
-			y[i] = alpha * A.dotRow(i, x) + beta * y[i];
-		}
-	}
-}
-
-
-// Multiply a row of A by a column of B.
-static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
-{
-	const uint w = (TA == NoTransposed) ? A.width() : A.height();
-	nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width());
-
-	float sum = 0.0f;
-
-	for (uint k = 0; k < w; k++)
-	{
-		const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k);	// @@ Move branches out of the loop?
-		const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
-		sum += a * b;
-	}
-
-	return sum;
-}
-
-
-// C = A * B
-void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
-{
-	mult(NoTransposed, A, NoTransposed, B, C);
-}
-
-void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
-{
-	sgemm(1.0f, TA, A, TB, B, 0.0f, C);
-}
-
-// C = alpha*A*B + beta*C
-void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
-{
-	sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
-}
-
-void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
-{
-	const uint w = C.width();
-	const uint h = C.height();
-
-	uint aw = (TA == NoTransposed) ? A.width() : A.height();
-	uint ah = (TA == NoTransposed) ? A.height() : A.width();
-	uint bw = (TB == NoTransposed) ? B.width() : B.height();
-	uint bh = (TB == NoTransposed) ? B.height() : B.width();
-
-	nvDebugCheck(aw == bh);
-	nvDebugCheck(bw == ah);
-	nvDebugCheck(w == bw);
-	nvDebugCheck(h == ah);
-
-	for (uint y = 0; y < h; y++)
-	{
-		for (uint x = 0; x < w; x++)
-		{
-			float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
-			C.setCoefficient(x, y, c);
-		}
-	}
-}
-
-
-
-
-
-/// Ctor. Init the size of the sparse matrix.
-SparseMatrix::SparseMatrix(uint d) : m_width(d)
-{
-	m_array.resize(d);
-}
-
-/// Ctor. Init the size of the sparse matrix.
-SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
-{
-	m_array.resize(h);
-}
-
-SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
-{
-	m_array = m.m_array;
-}
-
-const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
-{
-	nvCheck(width() == m.width());
-	nvCheck(height() == m.height());
-
-	m_array = m.m_array;
-
-	return *this;
-}
-
-
-// x is column, y is row
-float SparseMatrix::getCoefficient(uint x, uint y) const
-{
-	nvDebugCheck( x < width() );
-	nvDebugCheck( y < height() );
-
-	const uint count = m_array[y].count();
-	for (uint i = 0; i < count; i++)
-	{
-		if (m_array[y][i].x == x) return m_array[y][i].v;
-	}
-
-	return 0.0f;
-}
-
-void SparseMatrix::setCoefficient(uint x, uint y, float f)
-{
-	nvDebugCheck( x < width() );
-	nvDebugCheck( y < height() );
-
-	const uint count = m_array[y].count();
-	for (uint i = 0; i < count; i++)
-	{
-		if (m_array[y][i].x == x) 
-		{
-			m_array[y][i].v = f;
-			return;
-		}
-	}
-
-	Coefficient c = { x, f };
-	m_array[y].append( c );
-}
-
-void SparseMatrix::addCoefficient(uint x, uint y, float f)
-{
-	nvDebugCheck( x < width() );
-	nvDebugCheck( y < height() );
-
-	const uint count = m_array[y].count();
-	for (uint i = 0; i < count; i++)
-	{
-		if (m_array[y][i].x == x) 
-		{
-			m_array[y][i].v += f;
-			return;
-		}
-	}
-
-	Coefficient c = { x, f };
-	m_array[y].append( c );
-}
-
-void SparseMatrix::mulCoefficient(uint x, uint y, float f)
-{
-	nvDebugCheck( x < width() );
-	nvDebugCheck( y < height() );
-
-	const uint count = m_array[y].count();
-	for (uint i = 0; i < count; i++)
-	{
-		if (m_array[y][i].x == x) 
-		{
-			m_array[y][i].v *= f;
-			return;
-		}
-	}
-
-	Coefficient c = { x, f };
-	m_array[y].append( c );
-}
-
-
-float SparseMatrix::sumRow(uint y) const
-{
-	nvDebugCheck( y < height() );
-
-	const uint count = m_array[y].count();
-
-	/*float sum = 0;
-	for (uint i = 0; i < count; i++)
-	{
-		sum += m_array[y][i].v;
-	}
-	return sum;*/
-
-	KahanSum kahan;
-
-	for (uint i = 0; i < count; i++)
-	{
-		kahan.add(m_array[y][i].v);
-	}
-
-	return kahan.sum();
-}
-
-float SparseMatrix::dotRow(uint y, const FullVector & v) const
-{
-	nvDebugCheck( y < height() );
-
-	const uint count = m_array[y].count();
-
-	/*float sum = 0;
-	for (uint i = 0; i < count; i++)
-	{
-		sum += m_array[y][i].v * v[m_array[y][i].x];
-	}
-	return sum;*/
-
-	KahanSum kahan;
-
-	for (uint i = 0; i < count; i++)
-	{
-		kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
-	}
-
-	return kahan.sum();
-}
-
-void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
-{
-	nvDebugCheck(y < height());
-
-	const uint count = m_array[y].count();
-	for (uint i = 0; i < count; i++)
-	{
-		v[m_array[y][i].x] += alpha * m_array[y][i].v;
-	}
-}
-
-
-void SparseMatrix::clearRow(uint y)
-{
-	nvDebugCheck( y < height() );
-
-	m_array[y].clear();
-}
-
-void SparseMatrix::scaleRow(uint y, float f)
-{
-	nvDebugCheck( y < height() );
-
-	const uint count = m_array[y].count();
-	for (uint i = 0; i < count; i++)
-	{
-		m_array[y][i].v *= f;
-	}
-}
-
-void SparseMatrix::normalizeRow(uint y)
-{
-	nvDebugCheck( y < height() );
-
-	float norm = 0.0f;
-
-	const uint count = m_array[y].count();
-	for (uint i = 0; i < count; i++)
-	{
-		float f = m_array[y][i].v;
-		norm += f * f;
-	}
-
-	scaleRow(y, 1.0f / sqrtf(norm));
-}
-
-
-void SparseMatrix::clearColumn(uint x)
-{
-	nvDebugCheck(x < width());
-
-	for (uint y = 0; y < height(); y++)
-	{
-		const uint count = m_array[y].count();
-		for (uint e = 0; e < count; e++)
-		{
-			if (m_array[y][e].x == x)
-			{
-				m_array[y][e].v = 0.0f;
-				break;
-			}
-		}
-	}
-}
-
-void SparseMatrix::scaleColumn(uint x, float f)
-{
-	nvDebugCheck(x < width());
-
-	for (uint y = 0; y < height(); y++)
-	{
-		const uint count = m_array[y].count();
-		for (uint e = 0; e < count; e++)
-		{
-			if (m_array[y][e].x == x)
-			{
-				m_array[y][e].v *= f;
-				break;
-			}
-		}
-	}
-}
-
-const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
-{
-	return m_array[y];
-}
-
-
-// y = M * x
-void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
-{
-	mult(NoTransposed, M, x, y);
-}
-
-void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
-{
-	const uint w = M.width();
-	const uint h = M.height();
-
-	if (TM == Transposed)
-	{
-		nvDebugCheck( h == x.dimension() );
-		nvDebugCheck( w == y.dimension() );
-
-		y.fill(0.0f);
-
-		for (uint i = 0; i < h; i++)
-		{
-			M.madRow(i, x[i], y);
-		}
-	}
-	else
-	{
-		nvDebugCheck( w == x.dimension() );
-		nvDebugCheck( h == y.dimension() );
-
-		for (uint i = 0; i < h; i++)
-		{
-			y[i] = M.dotRow(i, x);
-		}
-	}
-}
-
-// y = alpha*A*x + beta*y
-void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
-{
-	sgemv(alpha, NoTransposed, A, x, beta, y);
-}
-
-void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
-{
-	const uint w = A.width();
-	const uint h = A.height();
-
-	if (TA == Transposed)
-	{
-		nvDebugCheck( h == x.dimension() );
-		nvDebugCheck( w == y.dimension() );
-
-		for (uint i = 0; i < h; i++)
-		{
-			A.madRow(i, alpha * x[i], y);
-		}
-	}
-	else
-	{
-		nvDebugCheck( w == x.dimension() );
-		nvDebugCheck( h == y.dimension() );
-
-		for (uint i = 0; i < h; i++)
-		{
-			y[i] = alpha * A.dotRow(i, x) + beta * y[i];
-		}
-	}
-}
-
-
-// dot y-row of A by x-column of B
-static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
-{
-	const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
-
-	const uint count = row.count();
-
-	/*float sum = 0.0f;
-	for (uint i = 0; i < count; i++)
-	{
-		const SparseMatrix::Coefficient & c = row[i];
-		sum += c.v * B.getCoefficient(x, c.x);
-	}
-	return sum;*/
-
-	KahanSum kahan;
-	for (uint i = 0; i < count; i++)
-	{
-		const SparseMatrix::Coefficient & c = row[i];
-		kahan.add(c.v * B.getCoefficient(x, c.x));
-	}
-
-	return kahan.sum();
-}
-
-// dot y-row of A by x-row of B
-static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
-{
-	const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
-
-	const uint count = row.count();
-
-	/*float sum = 0.0f;
-	for (uint i = 0; i < count; i++)
-	{
-		const SparseMatrix::Coefficient & c = row[i];
-		sum += c.v * B.getCoefficient(c.x, x);
-	}
-	//return sum;*/
-
-	KahanSum kahan;
-	for (uint i = 0; i < count; i++)
-	{
-		const SparseMatrix::Coefficient & c = row[i];
-		kahan.add(c.v * B.getCoefficient(c.x, x));
-	}
-
-	return kahan.sum();
-}
-
-// dot y-column of A by x-column of B
-static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
-{
-	nvDebugCheck(A.height() == B.height());
-
-	const uint h = A.height();
-
-	/*float sum = 0.0f;
-	for (uint i = 0; i < h; i++)
-	{
-		sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
-	}
-	//return sum;*/
-
-	KahanSum kahan;
-	for (uint i = 0; i < h; i++)
-	{
-		kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
-	}
-
-	return kahan.sum();
-}
-
-
-
-// C = A * B
-void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
-{
-	mult(NoTransposed, A, NoTransposed, B, C);
-}
-
-void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
-{
-	sgemm(1.0f, TA, A, TB, B, 0.0f, C);
-}
-
-// C = alpha*A*B + beta*C
-void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
-{
-	sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
-}
-
-void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
-{
-	const uint w = C.width();
-	const uint h = C.height();
-
-	uint aw = (TA == NoTransposed) ? A.width() : A.height();
-	uint ah = (TA == NoTransposed) ? A.height() : A.width();
-	uint bw = (TB == NoTransposed) ? B.width() : B.height();
-	uint bh = (TB == NoTransposed) ? B.height() : B.width();
-
-	nvDebugCheck(aw == bh);
-	nvDebugCheck(bw == ah);
-	nvDebugCheck(w == bw);
-	nvDebugCheck(h == ah);
-
-
-	for (uint y = 0; y < h; y++)
-	{
-		for (uint x = 0; x < w; x++)
-		{
-			float c = beta * C.getCoefficient(x, y);
-
-			if (TA == NoTransposed && TB == NoTransposed)
-			{
-				// dot y-row of A by x-column of B.
-				c += alpha * dotRowColumn(y, A, x, B);
-			}
-			else if (TA == Transposed && TB == Transposed)
-			{
-				// dot y-column of A by x-row of B.
-				c += alpha * dotRowColumn(x, B, y, A);
-			}
-			else if (TA == Transposed && TB == NoTransposed)
-			{
-				// dot y-column of A by x-column of B.
-				c += alpha * dotColumnColumn(y, A, x, B);
-			}
-			else if (TA == NoTransposed && TB == Transposed)
-			{
-				// dot y-row of A by x-row of B.
-				c += alpha * dotRowRow(y, A, x, B);
-			}
-
-			if (c != 0.0f)
-			{
-				C.setCoefficient(x, y, c);
-			}
-		}
-	}
-
-}
-
-// C = At * A
-void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
-{
-	// This is quite expensive...
-	mult(Transposed, A, NoTransposed, A, C);
-}
--- a/src/nvmath/Sparse.h
+++ b/src/nvmath/Sparse.h
@ -1,198 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#ifndef NV_MATH_SPARSE_H
-#define NV_MATH_SPARSE_H
-
-#include <nvcore/Containers.h>
-#include <nvmath/nvmath.h>
-
-// Full and sparse vector and matrix classes. BLAS subset.
-
-namespace nv
-{
-	class FullVector;
-	class FullMatrix;
-	class SparseMatrix;
-
-
-	/// Fixed size vector class.
-	class FullVector
-	{
-	public:
-
-		FullVector(uint dim);
-		FullVector(const FullVector & v);
-
-		const FullVector & operator=(const FullVector & v);
-		
-		uint dimension() const { return m_array.count(); }
-
-		const float & operator[]( uint index ) const { return m_array[index]; }
-		float & operator[] ( uint index ) { return m_array[index]; }
-
-		void fill(float f);
-		
-		void operator+= (const FullVector & v);
-		void operator-= (const FullVector & v);
-		void operator*= (const FullVector & v);
-
-		void operator+= (float f);
-		void operator-= (float f);
-		void operator*= (float f);
-
-
-	private:
-
-		Array<float> m_array;
-
-	};
-
-	// Pseudo-BLAS interface.
-	NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y);	// y = a * x + y
-	NVMATH_API void copy(const FullVector & x, FullVector & y);
-	NVMATH_API void scal(float a, FullVector & x);
-	NVMATH_API float dot(const FullVector & x, const FullVector & y);
-
-
-	enum Transpose
-	{
-		NoTransposed = 0,
-		Transposed = 1
-	};
-
-	/// Full matrix class.
-	class FullMatrix
-	{
-	public:
-
-		FullMatrix(uint d);
-		FullMatrix(uint w, uint h);
-		FullMatrix(const FullMatrix & m);
-
-		const FullMatrix & operator=(const FullMatrix & m);
-
-		uint width() const { return m_width; }
-		uint height() const { return m_height; }
-		bool isSquare() const { return m_width == m_height; }
-		
-		float getCoefficient(uint x, uint y) const;
-
-		void setCoefficient(uint x, uint y, float f);
-		void addCoefficient(uint x, uint y, float f);
-		void mulCoefficient(uint x, uint y, float f);
-		
-		float dotRow(uint y, const FullVector & v) const;
-		void madRow(uint y, float alpha, FullVector & v) const;
-
-	protected:
-
-		bool isValid() const {
-			return m_array.size() == (m_width * m_height);
-		}
-
-	private:
-
-		const uint m_width;
-		const uint m_height;
-		Array<float> m_array;
-
-	};
-
-	NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
-	NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
-
-	// y = alpha*A*x + beta*y
-	NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
-	NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
-
-	NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
-	NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
-
-	// C = alpha*A*B + beta*C
-	NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
-	NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
-
-
-	/**
-	 * Sparse matrix class. The matrix is assumed to be sparse and to have
-	 * very few non-zero elements, for this reason it's stored in indexed 
-	 * format. To multiply column vectors efficiently, the matrix stores 
-	 * the elements in indexed-column order, there is a list of indexed 
-	 * elements for each row of the matrix. As with the FullVector the 
-	 * dimension of the matrix is constant.
-	**/
-	class SparseMatrix
-	{
-		friend class FullMatrix;
-	public:
-
-		// An element of the sparse array.
-		struct Coefficient {
-			uint x;  // column
-			float v; // value
-		};
-
-
-	public:
-
-		SparseMatrix(uint d);
-		SparseMatrix(uint w, uint h);
-		SparseMatrix(const SparseMatrix & m);
-
-		const SparseMatrix & operator=(const SparseMatrix & m);
-
-
-		uint width() const { return m_width; }
-		uint height() const { return m_array.count(); }
-		bool isSquare() const { return width() == height(); }
-
-		float getCoefficient(uint x, uint y) const; // x is column, y is row
-
-		void setCoefficient(uint x, uint y, float f);
-		void addCoefficient(uint x, uint y, float f);
-		void mulCoefficient(uint x, uint y, float f);
-
-		float sumRow(uint y) const;
-		float dotRow(uint y, const FullVector & v) const;
-		void madRow(uint y, float alpha, FullVector & v) const;
-
-		void clearRow(uint y);
-		void scaleRow(uint y, float f);
-		void normalizeRow(uint y);
-
-		void clearColumn(uint x);
-		void scaleColumn(uint x, float f);
-
-		const Array<Coefficient> & getRow(uint y) const;
-
-	private:
-
-		/// Number of columns.
-		const uint m_width;
-		
-		/// Array of matrix elements.
-		Array< Array<Coefficient> > m_array;
-
-	};
-
-	NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
-	NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
-
-	// y = alpha*A*x + beta*y
-	NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
-	NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
-
-	NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
-	NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
-
-	// C = alpha*A*B + beta*C
-	NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
-	NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
-
-	// C = At * A
-	NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
-
-} // nv namespace
-
-
-#endif // NV_MATH_SPARSE_H
--- a/src/nvmath/SphericalHarmonic.cpp
+++ b/src/nvmath/SphericalHarmonic.cpp
@ -11,10 +11,8 @@ namespace
 	// Basic integer factorial.
 	inline static int factorial( int v )
 	{
-		const static int fac_table[] = { 1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800 };
-
-		if(v <= 11){
-			return fac_table[v];
+		if (v == 0) {
+			return 1;
 		}
 	
 		int result = v;
--- a/src/nvmath/SphericalHarmonic.h
+++ b/src/nvmath/SphericalHarmonic.h
@ -3,7 +3,6 @@
 #ifndef NV_MATH_SPHERICALHARMONIC_H
 #define NV_MATH_SPHERICALHARMONIC_H

-#include <string.h> // memcpy
 #include <nvmath/Vector.h>

 namespace nv
--- a/src/nvmath/TriBox.cpp
+++ b/src/nvmath/TriBox.cpp
@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg
 	if(min>rad || max<-rad) return false;


-bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
+bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
 {
 	// use separating axis theorem to test overlap between triangle and box
 	// need to test for overlap in these directions:
@ -170,7 +170,7 @@ bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const T
 }


-bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
+bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
 {
 	// use separating axis theorem to test overlap between triangle and box
 	// need to test for overlap in these directions:
--- a/src/nvmath/TypeSerialization.cpp
+++ b/src/nvmath/TypeSerialization.cpp
@ -1,82 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#include <nvcore/Stream.h>
-
-#include <nvmath/Vector.h>
-#include <nvmath/Matrix.h>
-#include <nvmath/Quaternion.h>
-#include <nvmath/Basis.h>
-#include <nvmath/Box.h>
-
-#include <nvmath/TypeSerialization.h>
-
-using namespace nv;
-
-Stream & nv::operator<< (Stream & s, Vector2 & v)
-{
-	float x = v.x();
-	float y = v.y();
-
-	s << x << y;
-
-	if (s.isLoading())
-	{
-		v.set(x, y);
-	}
-
-	return s;
-}
-
-Stream & nv::operator<< (Stream & s, Vector3 & v)
-{
-	float x = v.x();
-	float y = v.y();
-	float z = v.z();
-
-	s << x << y << z;
-
-	if (s.isLoading())
-	{
-		v.set(x, y, z);
-	}
-
-	return s;
-}
-
-Stream & nv::operator<< (Stream & s, Vector4 & v)
-{
-	float x = v.x();
-	float y = v.y();
-	float z = v.z();
-	float w = v.w();
-
-	s << x << y << z << w;
-
-	if (s.isLoading())
-	{
-		v.set(x, y, z, w);
-	}
-
-	return s;
-}
-
-Stream & nv::operator<< (Stream & s, Matrix & m)
-{
-	return s;
-}
-
-Stream & nv::operator<< (Stream & s, Quaternion & q)
-{
-	return s << q.asVector();
-}
-
-Stream & nv::operator<< (Stream & s, Basis & basis)
-{
-	return s << basis.tangent << basis.bitangent << basis.normal;
-}
-
-Stream & nv::operator<< (Stream & s, Box & box)
-{
-	return s << box.m_mins << box.m_maxs;
-}
-
--- a/src/nvmath/TypeSerialization.h
+++ b/src/nvmath/TypeSerialization.h
@ -1,32 +0,0 @@
-// This code is in the public domain -- castanyo@yahoo.es
-
-#ifndef NV_MATH_TYPESERIALIZATION_H
-#define NV_MATH_TYPESERIALIZATION_H
-
-#include <nvmath/nvmath.h>
-
-namespace nv
-{
-	class Stream;
-
-	class Vector2;
-	class Vector3;
-	class Vector4;
-
-	class Matrix;
-	class Quaternion;
-	struct Basis;
-	class Box;
-	
-	NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
-	NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
-	NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
-
-	NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
-	NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
-	NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
-	NVMATH_API Stream & operator<< (Stream & s, Box & obj);
-
-} // nv namespace
-
-#endif // NV_MATH_TYPESERIALIZATION_H
--- a/src/nvmath/Vector.h
+++ b/src/nvmath/Vector.h
@ -4,7 +4,7 @@
 #define NV_MATH_VECTOR_H

 #include <nvmath/nvmath.h>
-#include <nvcore/Algorithms.h> // min, max
+#include <nvcore/Containers.h> // min, max

 namespace nv
 {
@ -71,7 +71,6 @@ public:
 	const Vector2 & xy() const;

 	scalar component(uint idx) const;
-	void setComponent(uint idx, scalar f);

 	const scalar * ptr() const;

@ -240,21 +239,13 @@ inline const Vector2 & Vector3::xy() const
 inline scalar Vector3::component(uint idx) const
 {
 	nvDebugCheck(idx < 3);
-	if (idx == 0) return m_x;
-	if (idx == 1) return m_y;
-	if (idx == 2) return m_z;
+	if (idx == 0) return x();
+	if (idx == 1) return y();
+	if (idx == 2) return z();
 	nvAssume(false);
 	return 0.0f;
 }

-inline void Vector3::setComponent(uint idx, float f)
-{
-	nvDebugCheck(idx < 3);
-	if (idx == 0) m_x = f;
-	else if (idx == 1) m_y = f;
-	else if (idx == 2) m_z = f;
-}
-
 inline const scalar * Vector3::ptr() const
 {
 	return &m_x;
@ -486,35 +477,6 @@ inline scalar length(Vector2::Arg v)
 	return sqrtf(length_squared(v));
 }

-inline scalar inverse_length(Vector2::Arg v)
-{
-	return 1.0f / sqrtf(length_squared(v));
-}
-
-inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
-{
-	return equal(length(v), 1, epsilon);
-}
-
-inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
-{
-	float l = length(v);
-	nvDebugCheck(!isZero(l, epsilon));
-	Vector2 n = scale(v, 1.0f / l);
-	nvDebugCheck(isNormalized(n));
-	return n;
-}
-
-inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
-{
-	float l = length(v);
-	if (isZero(l, epsilon)) {
-		return fallback;
-	}
-	return scale(v, 1.0f / l);
-}
-
-
 inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
 {
 	return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
@ -633,11 +595,6 @@ inline scalar length(Vector3::Arg v)
 	return sqrtf(length_squared(v));
 }

-inline scalar inverse_length(Vector3::Arg v)
-{
-	return 1.0f / sqrtf(length_squared(v));
-}
-
 inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
 {
 	return equal(length(v), 1, epsilon);
@ -759,11 +716,6 @@ inline scalar length(Vector4::Arg v)
 	return sqrtf(length_squared(v));
 }

-inline scalar inverse_length(Vector4::Arg v)
-{
-	return 1.0f / sqrtf(length_squared(v));
-}
-
 inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
 {
 	return equal(length(v), 1, epsilon);
--- a/src/nvmath/nvmath.h
+++ b/src/nvmath/nvmath.h
@ -136,11 +136,6 @@ inline float lerp(float f0, float f1, float t)
 	return f0 * s + f1 * t;
 }

-inline float square(float f)
-{
-	return f * f;
-}
-
 } // nv

 #endif // NV_MATH_H