Create 2.0.4 release.
This commit is contained in:
@ -27,7 +27,8 @@ void Basis::orthonormalize(float epsilon /*= NV_EPSILON*/)
|
||||
tangent -= normal * dot(normal, tangent);
|
||||
tangent = ::normalize(tangent, epsilon);
|
||||
|
||||
bitangent -= normal * dot(normal, bitangent) + tangent * dot(tangent, bitangent);
|
||||
bitangent -= normal * dot(normal, bitangent);
|
||||
bitangent -= tangent * dot(tangent, bitangent);
|
||||
bitangent = ::normalize(bitangent, epsilon);
|
||||
}
|
||||
|
||||
@ -48,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
|
||||
return;
|
||||
}
|
||||
}
|
||||
normal = nv::normalize(normal, epsilon);
|
||||
normal = ::normalize(normal, epsilon);
|
||||
|
||||
tangent -= normal * dot(normal, tangent);
|
||||
bitangent -= normal * dot(normal, bitangent);
|
||||
@ -67,8 +68,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
|
||||
}
|
||||
else
|
||||
{
|
||||
#if 0
|
||||
tangent = nv::normalize(tangent, epsilon);
|
||||
tangent = ::normalize(tangent, epsilon);
|
||||
bitangent -= tangent * dot(tangent, bitangent);
|
||||
|
||||
if (length(bitangent) < epsilon)
|
||||
@ -78,47 +78,11 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
|
||||
}
|
||||
else
|
||||
{
|
||||
bitangent = nv::normalize(bitangent, epsilon);
|
||||
tangent = ::normalize(tangent, epsilon);
|
||||
}
|
||||
#else
|
||||
if (length(bitangent) < epsilon)
|
||||
{
|
||||
bitangent = cross(tangent, normal);
|
||||
nvCheck(isNormalized(bitangent));
|
||||
}
|
||||
else
|
||||
{
|
||||
tangent = nv::normalize(tangent);
|
||||
bitangent = nv::normalize(bitangent);
|
||||
|
||||
Vector3 bisector = nv::normalize(tangent + bitangent);
|
||||
Vector3 axis = cross(bisector, normal);
|
||||
|
||||
nvDebugCheck(isNormalized(axis, epsilon));
|
||||
nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
|
||||
|
||||
if (dot(axis, tangent) > 0)
|
||||
{
|
||||
tangent = nv::normalize(bisector + axis);
|
||||
bitangent = nv::normalize(bisector - axis);
|
||||
}
|
||||
else
|
||||
{
|
||||
tangent = nv::normalize(bisector - axis);
|
||||
bitangent = nv::normalize(bisector + axis);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*// Check vector lengths.
|
||||
if (!isNormalized(normal, epsilon))
|
||||
{
|
||||
nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z());
|
||||
nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z());
|
||||
nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z());
|
||||
}*/
|
||||
|
||||
// Check vector lengths.
|
||||
nvCheck(isNormalized(normal, epsilon));
|
||||
nvCheck(isNormalized(tangent, epsilon));
|
||||
nvCheck(isNormalized(bitangent, epsilon));
|
||||
@ -161,18 +125,9 @@ void Basis::buildFrameForDirection(Vector3::Arg d)
|
||||
bitangent = cross(normal, tangent);
|
||||
}
|
||||
|
||||
bool Basis::isValid() const
|
||||
{
|
||||
if (equal(normal, Vector3(zero))) return false;
|
||||
if (equal(tangent, Vector3(zero))) return false;
|
||||
if (equal(bitangent, Vector3(zero))) return false;
|
||||
|
||||
if (equal(determinant(), 0.0f)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
/// Transform by this basis. (From this basis to object space).
|
||||
Vector3 Basis::transform(Vector3::Arg v) const
|
||||
{
|
||||
@ -189,31 +144,30 @@ Vector3 Basis::transformT(Vector3::Arg v)
|
||||
}
|
||||
|
||||
/// Transform by the inverse. (From object space to this basis).
|
||||
/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
|
||||
/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
|
||||
Vector3 Basis::transformI(Vector3::Arg v) const
|
||||
{
|
||||
const float det = determinant();
|
||||
nvDebugCheck(!equal(det, 0.0f, 0.0f));
|
||||
nvCheck(!equalf(det, 0.0f));
|
||||
|
||||
const float idet = 1.0f / det;
|
||||
|
||||
// Rows of the inverse matrix.
|
||||
Vector3 r0(
|
||||
(bitangent.y() * normal.z() - bitangent.z() * normal.y()),
|
||||
-(bitangent.x() * normal.z() - bitangent.z() * normal.x()),
|
||||
(bitangent.x() * normal.y() - bitangent.y() * normal.x()));
|
||||
Vector3 r0, r1, r2;
|
||||
r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
|
||||
r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
|
||||
r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;
|
||||
|
||||
Vector3 r1(
|
||||
-(tangent.y() * normal.z() - tangent.z() * normal.y()),
|
||||
(tangent.x() * normal.z() - tangent.z() * normal.x()),
|
||||
-(tangent.x() * normal.y() - tangent.y() * normal.x()));
|
||||
r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
|
||||
r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
|
||||
r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;
|
||||
|
||||
Vector3 r2(
|
||||
(tangent.y() * bitangent.z() - tangent.z() * bitangent.y()),
|
||||
-(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()),
|
||||
(tangent.x() * bitangent.y() - tangent.y() * bitangent.x()));
|
||||
r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
|
||||
r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
|
||||
r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;
|
||||
|
||||
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
|
||||
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
|
@ -54,8 +54,7 @@ namespace nv
|
||||
tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
|
||||
}
|
||||
|
||||
bool isValid() const;
|
||||
|
||||
/*
|
||||
// Get transform matrix for this basis.
|
||||
NVMATH_API Matrix matrix() const;
|
||||
|
||||
@ -67,7 +66,7 @@ namespace nv
|
||||
|
||||
// Transform by the inverse. (From object space to this basis).
|
||||
NVMATH_API Vector3 transformI(Vector3::Arg v) const;
|
||||
|
||||
*/
|
||||
|
||||
Vector3 tangent;
|
||||
Vector3 bitangent;
|
||||
|
@ -9,7 +9,6 @@
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Stream;
|
||||
|
||||
/// Axis Aligned Bounding Box.
|
||||
class Box
|
||||
@ -28,13 +27,11 @@ public:
|
||||
// Cast operators.
|
||||
operator const float * () const { return reinterpret_cast<const float *>(this); }
|
||||
|
||||
// Min corner of the box.
|
||||
Vector3 minCorner() const { return m_mins; }
|
||||
Vector3 & minCorner() { return m_mins; }
|
||||
/// Min corner of the box.
|
||||
Vector3 mins() const { return m_mins; }
|
||||
|
||||
// Max corner of the box.
|
||||
Vector3 maxCorner() const { return m_maxs; }
|
||||
Vector3 & maxCorner() { return m_maxs; }
|
||||
/// Max corner of the box.
|
||||
Vector3 maxs() const { return m_maxs; }
|
||||
|
||||
/// Clear the bounds.
|
||||
void clearBounds()
|
||||
@ -111,7 +108,7 @@ public:
|
||||
float area() const
|
||||
{
|
||||
const Vector3 d = extents();
|
||||
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
|
||||
return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
|
||||
}
|
||||
|
||||
/// Get the volume of the box.
|
||||
@ -121,16 +118,6 @@ public:
|
||||
return 8.0f * (d.x() * d.y() * d.z());
|
||||
}
|
||||
|
||||
/// Return true if the box contains the given point.
|
||||
bool contains(Vector3::Arg p) const
|
||||
{
|
||||
return
|
||||
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
|
||||
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
|
||||
}
|
||||
|
||||
friend Stream & operator<< (Stream & s, Box & box);
|
||||
|
||||
private:
|
||||
|
||||
Vector3 m_mins;
|
||||
@ -138,6 +125,15 @@ private:
|
||||
};
|
||||
|
||||
|
||||
/*
|
||||
/// Point inside box test.
|
||||
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
|
||||
{
|
||||
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
|
||||
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
|
||||
}
|
||||
*/
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
@ -5,20 +5,13 @@ SET(MATH_SRCS
|
||||
Vector.h
|
||||
Matrix.h
|
||||
Quaternion.h
|
||||
Plane.h Plane.cpp
|
||||
Box.h
|
||||
Color.h
|
||||
Montecarlo.h Montecarlo.cpp
|
||||
Random.h Random.cpp
|
||||
SphericalHarmonic.h SphericalHarmonic.cpp
|
||||
Basis.h Basis.cpp
|
||||
Triangle.h Triangle.cpp TriBox.cpp
|
||||
Polygon.h Polygon.cpp
|
||||
TypeSerialization.h TypeSerialization.cpp
|
||||
Sparse.h Sparse.cpp
|
||||
Solver.h Solver.cpp
|
||||
KahanSum.h
|
||||
Half.h Half.cpp)
|
||||
Triangle.h Triangle.cpp TriBox.cpp)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
|
@ -1,563 +0,0 @@
|
||||
// Branch-free implementation of half-precision (16 bit) floating point
|
||||
// Copyright 2006 Mike Acton <macton@gmail.com>
|
||||
//
|
||||
// Permission is hereby granted, free of charge, to any person obtaining a
|
||||
// copy of this software and associated documentation files (the "Software"),
|
||||
// to deal in the Software without restriction, including without limitation
|
||||
// the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
// and/or sell copies of the Software, and to permit persons to whom the
|
||||
// Software is furnished to do so, subject to the following conditions:
|
||||
//
|
||||
// The above copyright notice and this permission notice shall be included
|
||||
// in all copies or substantial portions of the Software.
|
||||
//
|
||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
// THE SOFTWARE
|
||||
//
|
||||
// Half-precision floating point format
|
||||
// ------------------------------------
|
||||
//
|
||||
// | Field | Last | First | Note
|
||||
// |----------|------|-------|----------
|
||||
// | Sign | 15 | 15 |
|
||||
// | Exponent | 14 | 10 | Bias = 15
|
||||
// | Mantissa | 9 | 0 |
|
||||
//
|
||||
// Compiling
|
||||
// ---------
|
||||
//
|
||||
// Preferred compile flags for GCC:
|
||||
// -O3 -fstrict-aliasing -std=c99 -pedantic -Wall -Wstrict-aliasing
|
||||
//
|
||||
// This file is a C99 source file, intended to be compiled with a C99
|
||||
// compliant compiler. However, for the moment it remains combatible
|
||||
// with C++98. Therefore if you are using a compiler that poorly implements
|
||||
// C standards (e.g. MSVC), it may be compiled as C++. This is not
|
||||
// guaranteed for future versions.
|
||||
//
|
||||
// Features
|
||||
// --------
|
||||
//
|
||||
// * QNaN + <x> = QNaN
|
||||
// * <x> + +INF = +INF
|
||||
// * <x> - -INF = -INF
|
||||
// * INF - INF = SNaN
|
||||
// * Denormalized values
|
||||
// * Difference of ZEROs is always +ZERO
|
||||
// * Sum round with guard + round + sticky bit (grs)
|
||||
// * And of course... no branching
|
||||
//
|
||||
// Precision of Sum
|
||||
// ----------------
|
||||
//
|
||||
// (SUM) uint16 z = half_add( x, y );
|
||||
// (DIFFERENCE) uint16 z = half_add( x, -y );
|
||||
//
|
||||
// Will have exactly (0 ulps difference) the same result as:
|
||||
// (For 32 bit IEEE 784 floating point and same rounding mode)
|
||||
//
|
||||
// union FLOAT_32
|
||||
// {
|
||||
// float f32;
|
||||
// uint32 u32;
|
||||
// };
|
||||
//
|
||||
// union FLOAT_32 fx = { .u32 = half_to_float( x ) };
|
||||
// union FLOAT_32 fy = { .u32 = half_to_float( y ) };
|
||||
// union FLOAT_32 fz = { .f32 = fx.f32 + fy.f32 };
|
||||
// uint16 z = float_to_half( fz );
|
||||
//
|
||||
|
||||
#include "Half.h"
|
||||
#include <stdio.h>
|
||||
|
||||
// Load immediate
|
||||
static inline uint32 _uint32_li( uint32 a )
|
||||
{
|
||||
return (a);
|
||||
}
|
||||
|
||||
// Decrement
|
||||
static inline uint32 _uint32_dec( uint32 a )
|
||||
{
|
||||
return (a - 1);
|
||||
}
|
||||
|
||||
// Complement
|
||||
static inline uint32 _uint32_not( uint32 a )
|
||||
{
|
||||
return (~a);
|
||||
}
|
||||
|
||||
// Negate
|
||||
static inline uint32 _uint32_neg( uint32 a )
|
||||
{
|
||||
#if NV_CC_MSVC
|
||||
// prevent msvc warning.
|
||||
return ~a + 1;
|
||||
#else
|
||||
return (-a);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Extend sign
|
||||
static inline uint32 _uint32_ext( uint32 a )
|
||||
{
|
||||
return (((int32)a)>>31);
|
||||
}
|
||||
|
||||
// And
|
||||
static inline uint32 _uint32_and( uint32 a, uint32 b )
|
||||
{
|
||||
return (a & b);
|
||||
}
|
||||
|
||||
// And with Complement
|
||||
static inline uint32 _uint32_andc( uint32 a, uint32 b )
|
||||
{
|
||||
return (a & ~b);
|
||||
}
|
||||
|
||||
// Or
|
||||
static inline uint32 _uint32_or( uint32 a, uint32 b )
|
||||
{
|
||||
return (a | b);
|
||||
}
|
||||
|
||||
// Shift Right Logical
|
||||
static inline uint32 _uint32_srl( uint32 a, int sa )
|
||||
{
|
||||
return (a >> sa);
|
||||
}
|
||||
|
||||
// Shift Left Logical
|
||||
static inline uint32 _uint32_sll( uint32 a, int sa )
|
||||
{
|
||||
return (a << sa);
|
||||
}
|
||||
|
||||
// Add
|
||||
static inline uint32 _uint32_add( uint32 a, uint32 b )
|
||||
{
|
||||
return (a + b);
|
||||
}
|
||||
|
||||
// Subtract
|
||||
static inline uint32 _uint32_sub( uint32 a, uint32 b )
|
||||
{
|
||||
return (a - b);
|
||||
}
|
||||
|
||||
// Select on Sign bit
|
||||
static inline uint32 _uint32_sels( uint32 test, uint32 a, uint32 b )
|
||||
{
|
||||
const uint32 mask = _uint32_ext( test );
|
||||
const uint32 sel_a = _uint32_and( a, mask );
|
||||
const uint32 sel_b = _uint32_andc( b, mask );
|
||||
const uint32 result = _uint32_or( sel_a, sel_b );
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// Load Immediate
|
||||
static inline uint16 _uint16_li( uint16 a )
|
||||
{
|
||||
return (a);
|
||||
}
|
||||
|
||||
// Extend sign
|
||||
static inline uint16 _uint16_ext( uint16 a )
|
||||
{
|
||||
return (((int16)a)>>15);
|
||||
}
|
||||
|
||||
// Negate
|
||||
static inline uint16 _uint16_neg( uint16 a )
|
||||
{
|
||||
return (-a);
|
||||
}
|
||||
|
||||
// Complement
|
||||
static inline uint16 _uint16_not( uint16 a )
|
||||
{
|
||||
return (~a);
|
||||
}
|
||||
|
||||
// Decrement
|
||||
static inline uint16 _uint16_dec( uint16 a )
|
||||
{
|
||||
return (a - 1);
|
||||
}
|
||||
|
||||
// Shift Left Logical
|
||||
static inline uint16 _uint16_sll( uint16 a, int sa )
|
||||
{
|
||||
return (a << sa);
|
||||
}
|
||||
|
||||
// Shift Right Logical
|
||||
static inline uint16 _uint16_srl( uint16 a, int sa )
|
||||
{
|
||||
return (a >> sa);
|
||||
}
|
||||
|
||||
// Add
|
||||
static inline uint16 _uint16_add( uint16 a, uint16 b )
|
||||
{
|
||||
return (a + b);
|
||||
}
|
||||
|
||||
// Subtract
|
||||
static inline uint16 _uint16_sub( uint16 a, uint16 b )
|
||||
{
|
||||
return (a - b);
|
||||
}
|
||||
|
||||
// And
|
||||
static inline uint16 _uint16_and( uint16 a, uint16 b )
|
||||
{
|
||||
return (a & b);
|
||||
}
|
||||
|
||||
// Or
|
||||
static inline uint16 _uint16_or( uint16 a, uint16 b )
|
||||
{
|
||||
return (a | b);
|
||||
}
|
||||
|
||||
// Exclusive Or
|
||||
static inline uint16 _uint16_xor( uint16 a, uint16 b )
|
||||
{
|
||||
return (a ^ b);
|
||||
}
|
||||
|
||||
// And with Complement
|
||||
static inline uint16 _uint16_andc( uint16 a, uint16 b )
|
||||
{
|
||||
return (a & ~b);
|
||||
}
|
||||
|
||||
// And then Shift Right Logical
|
||||
static inline uint16 _uint16_andsrl( uint16 a, uint16 b, int sa )
|
||||
{
|
||||
return ((a & b) >> sa);
|
||||
}
|
||||
|
||||
// Shift Right Logical then Mask
|
||||
static inline uint16 _uint16_srlm( uint16 a, int sa, uint16 mask )
|
||||
{
|
||||
return ((a >> sa) & mask);
|
||||
}
|
||||
|
||||
// Add then Mask
|
||||
static inline uint16 _uint16_addm( uint16 a, uint16 b, uint16 mask )
|
||||
{
|
||||
return ((a + b) & mask);
|
||||
}
|
||||
|
||||
|
||||
// Select on Sign bit
|
||||
static inline uint16 _uint16_sels( uint16 test, uint16 a, uint16 b )
|
||||
{
|
||||
const uint16 mask = _uint16_ext( test );
|
||||
const uint16 sel_a = _uint16_and( a, mask );
|
||||
const uint16 sel_b = _uint16_andc( b, mask );
|
||||
const uint16 result = _uint16_or( sel_a, sel_b );
|
||||
|
||||
return (result);
|
||||
}
|
||||
|
||||
// Count Leading Zeros
|
||||
static inline uint32 _uint32_cntlz( uint32 x )
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
/* On PowerPC, this will map to insn: cntlzw */
|
||||
/* On Pentium, this will map to insn: clz */
|
||||
uint32 nlz = __builtin_clz( x );
|
||||
return (nlz);
|
||||
#else
|
||||
const uint32 x0 = _uint32_srl( x, 1 );
|
||||
const uint32 x1 = _uint32_or( x, x0 );
|
||||
const uint32 x2 = _uint32_srl( x1, 2 );
|
||||
const uint32 x3 = _uint32_or( x1, x2 );
|
||||
const uint32 x4 = _uint32_srl( x3, 4 );
|
||||
const uint32 x5 = _uint32_or( x3, x4 );
|
||||
const uint32 x6 = _uint32_srl( x5, 8 );
|
||||
const uint32 x7 = _uint32_or( x5, x6 );
|
||||
const uint32 x8 = _uint32_srl( x7, 16 );
|
||||
const uint32 x9 = _uint32_or( x7, x8 );
|
||||
const uint32 xA = _uint32_not( x9 );
|
||||
const uint32 xB = _uint32_srl( xA, 1 );
|
||||
const uint32 xC = _uint32_and( xB, 0x55555555 );
|
||||
const uint32 xD = _uint32_sub( xA, xC );
|
||||
const uint32 xE = _uint32_and( xD, 0x33333333 );
|
||||
const uint32 xF = _uint32_srl( xD, 2 );
|
||||
const uint32 x10 = _uint32_and( xF, 0x33333333 );
|
||||
const uint32 x11 = _uint32_add( xE, x10 );
|
||||
const uint32 x12 = _uint32_srl( x11, 4 );
|
||||
const uint32 x13 = _uint32_add( x11, x12 );
|
||||
const uint32 x14 = _uint32_and( x13, 0x0f0f0f0f );
|
||||
const uint32 x15 = _uint32_srl( x14, 8 );
|
||||
const uint32 x16 = _uint32_add( x14, x15 );
|
||||
const uint32 x17 = _uint32_srl( x16, 16 );
|
||||
const uint32 x18 = _uint32_add( x16, x17 );
|
||||
const uint32 x19 = _uint32_and( x18, 0x0000003f );
|
||||
return ( x19 );
|
||||
#endif
|
||||
}
|
||||
|
||||
// Count Leading Zeros
|
||||
static inline uint16 _uint16_cntlz( uint16 x )
|
||||
{
|
||||
#ifdef __GNUC__
|
||||
/* On PowerPC, this will map to insn: cntlzw */
|
||||
/* On Pentium, this will map to insn: clz */
|
||||
uint32 x32 = _uint32_sll( x, 16 );
|
||||
uint16 nlz = (uint16)__builtin_clz( x32 );
|
||||
return (nlz);
|
||||
#else
|
||||
const uint16 x0 = _uint16_srl( x, 1 );
|
||||
const uint16 x1 = _uint16_or( x, x0 );
|
||||
const uint16 x2 = _uint16_srl( x1, 2 );
|
||||
const uint16 x3 = _uint16_or( x1, x2 );
|
||||
const uint16 x4 = _uint16_srl( x3, 4 );
|
||||
const uint16 x5 = _uint16_or( x3, x4 );
|
||||
const uint16 x6 = _uint16_srl( x5, 8 );
|
||||
const uint16 x7 = _uint16_or( x5, x6 );
|
||||
const uint16 x8 = _uint16_not( x7 );
|
||||
const uint16 x9 = _uint16_srlm( x8, 1, 0x5555 );
|
||||
const uint16 xA = _uint16_sub( x8, x9 );
|
||||
const uint16 xB = _uint16_and( xA, 0x3333 );
|
||||
const uint16 xC = _uint16_srlm( xA, 2, 0x3333 );
|
||||
const uint16 xD = _uint16_add( xB, xC );
|
||||
const uint16 xE = _uint16_srl( xD, 4 );
|
||||
const uint16 xF = _uint16_addm( xD, xE, 0x0f0f );
|
||||
const uint16 x10 = _uint16_srl( xF, 8 );
|
||||
const uint16 x11 = _uint16_addm( xF, x10, 0x001f );
|
||||
return ( x11 );
|
||||
#endif
|
||||
}
|
||||
|
||||
uint16
|
||||
half_from_float( uint32 f )
|
||||
{
|
||||
const uint32 one = _uint32_li( 0x00000001 );
|
||||
const uint32 f_e_mask = _uint32_li( 0x7f800000 );
|
||||
const uint32 f_m_mask = _uint32_li( 0x007fffff );
|
||||
const uint32 f_s_mask = _uint32_li( 0x80000000 );
|
||||
const uint32 h_e_mask = _uint32_li( 0x00007c00 );
|
||||
const uint32 f_e_pos = _uint32_li( 0x00000017 );
|
||||
const uint32 f_m_round_bit = _uint32_li( 0x00001000 );
|
||||
const uint32 h_nan_em_min = _uint32_li( 0x00007c01 );
|
||||
const uint32 f_h_s_pos_offset = _uint32_li( 0x00000010 );
|
||||
const uint32 f_m_hidden_bit = _uint32_li( 0x00800000 );
|
||||
const uint32 f_h_m_pos_offset = _uint32_li( 0x0000000d );
|
||||
const uint32 f_h_bias_offset = _uint32_li( 0x38000000 );
|
||||
const uint32 f_m_snan_mask = _uint32_li( 0x003fffff );
|
||||
const uint16 h_snan_mask = _uint32_li( 0x00007e00 );
|
||||
const uint32 f_e = _uint32_and( f, f_e_mask );
|
||||
const uint32 f_m = _uint32_and( f, f_m_mask );
|
||||
const uint32 f_s = _uint32_and( f, f_s_mask );
|
||||
const uint32 f_e_h_bias = _uint32_sub( f_e, f_h_bias_offset );
|
||||
const uint32 f_e_h_bias_amount = _uint32_srl( f_e_h_bias, f_e_pos );
|
||||
const uint32 f_m_round_mask = _uint32_and( f_m, f_m_round_bit );
|
||||
const uint32 f_m_round_offset = _uint32_sll( f_m_round_mask, one );
|
||||
const uint32 f_m_rounded = _uint32_add( f_m, f_m_round_offset );
|
||||
const uint32 f_m_rounded_overflow = _uint32_and( f_m_rounded, f_m_hidden_bit );
|
||||
const uint32 f_m_denorm_sa = _uint32_sub( one, f_e_h_bias_amount );
|
||||
const uint32 f_m_with_hidden = _uint32_or( f_m_rounded, f_m_hidden_bit );
|
||||
const uint32 f_m_denorm = _uint32_srl( f_m_with_hidden, f_m_denorm_sa );
|
||||
const uint32 f_em_norm_packed = _uint32_or( f_e_h_bias, f_m_rounded );
|
||||
const uint32 f_e_overflow = _uint32_add( f_e_h_bias, f_m_hidden_bit );
|
||||
const uint32 h_s = _uint32_srl( f_s, f_h_s_pos_offset );
|
||||
const uint32 h_m_nan = _uint32_srl( f_m, f_h_m_pos_offset );
|
||||
const uint32 h_m_denorm = _uint32_srl( f_m_denorm, f_h_m_pos_offset );
|
||||
const uint32 h_em_norm = _uint32_srl( f_em_norm_packed, f_h_m_pos_offset );
|
||||
const uint32 h_em_overflow = _uint32_srl( f_e_overflow, f_h_m_pos_offset );
|
||||
const uint32 is_e_eqz_msb = _uint32_dec( f_e );
|
||||
const uint32 is_m_nez_msb = _uint32_neg( f_m );
|
||||
const uint32 is_h_m_nan_nez_msb = _uint32_neg( h_m_nan );
|
||||
const uint32 is_e_nflagged_msb = _uint32_sub( f_e, f_e_mask );
|
||||
const uint32 is_ninf_msb = _uint32_or( is_e_nflagged_msb, is_m_nez_msb );
|
||||
const uint32 is_underflow_msb = _uint32_sub( is_e_eqz_msb, f_h_bias_offset );
|
||||
const uint32 is_nan_nunderflow_msb = _uint32_or( is_h_m_nan_nez_msb, is_e_nflagged_msb );
|
||||
const uint32 is_m_snan_msb = _uint32_sub( f_m_snan_mask, f_m );
|
||||
const uint32 is_snan_msb = _uint32_andc( is_m_snan_msb, is_e_nflagged_msb );
|
||||
const uint32 is_overflow_msb = _uint32_neg( f_m_rounded_overflow );
|
||||
const uint32 h_nan_underflow_result = _uint32_sels( is_nan_nunderflow_msb, h_em_norm, h_nan_em_min );
|
||||
const uint32 h_inf_result = _uint32_sels( is_ninf_msb, h_nan_underflow_result, h_e_mask );
|
||||
const uint32 h_underflow_result = _uint32_sels( is_underflow_msb, h_m_denorm, h_inf_result );
|
||||
const uint32 h_overflow_result = _uint32_sels( is_overflow_msb, h_em_overflow, h_underflow_result );
|
||||
const uint32 h_em_result = _uint32_sels( is_snan_msb, h_snan_mask, h_overflow_result );
|
||||
const uint32 h_result = _uint32_or( h_em_result, h_s );
|
||||
|
||||
return (h_result);
|
||||
}
|
||||
|
||||
uint32
|
||||
half_to_float( uint16 h )
|
||||
{
|
||||
const uint32 h_e_mask = _uint32_li( 0x00007c00 );
|
||||
const uint32 h_m_mask = _uint32_li( 0x000003ff );
|
||||
const uint32 h_s_mask = _uint32_li( 0x00008000 );
|
||||
const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 );
|
||||
const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d );
|
||||
const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 );
|
||||
const uint32 f_e_mask = _uint32_li( 0x7f800000 );
|
||||
const uint32 f_m_mask = _uint32_li( 0x007fffff );
|
||||
const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e );
|
||||
const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 );
|
||||
const uint32 f_e_pos = _uint32_li( 0x00000017 );
|
||||
const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff );
|
||||
const uint32 h_e = _uint32_and( h, h_e_mask );
|
||||
const uint32 h_m = _uint32_and( h, h_m_mask );
|
||||
const uint32 h_s = _uint32_and( h, h_s_mask );
|
||||
const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
|
||||
const uint32 h_m_nlz = _uint32_cntlz( h_m );
|
||||
const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset );
|
||||
const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
|
||||
const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset );
|
||||
const uint32 f_em = _uint32_or( f_e, f_m );
|
||||
const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
|
||||
const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
|
||||
const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa );
|
||||
const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask );
|
||||
const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
|
||||
const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
|
||||
const uint32 f_em_nan = _uint32_or( f_e_mask, f_m );
|
||||
const uint32 is_e_eqz_msb = _uint32_dec( h_e );
|
||||
const uint32 is_m_nez_msb = _uint32_neg( h_m );
|
||||
const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
|
||||
const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
|
||||
const uint32 is_inf_msb = _uint32_andc( is_e_flagged_msb, is_m_nez_msb );
|
||||
const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
|
||||
const uint32 is_nan_msb = _uint32_and( is_e_flagged_msb, is_m_nez_msb );
|
||||
const uint32 is_zero = _uint32_ext( is_zero_msb );
|
||||
const uint32 f_zero_result = _uint32_andc( f_em, is_zero );
|
||||
const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
|
||||
const uint32 f_inf_result = _uint32_sels( is_inf_msb, f_e_mask, f_denorm_result );
|
||||
const uint32 f_nan_result = _uint32_sels( is_nan_msb, f_em_nan, f_inf_result );
|
||||
const uint32 f_result = _uint32_or( f_s, f_nan_result );
|
||||
|
||||
return (f_result);
|
||||
}
|
||||
|
||||
uint16
|
||||
half_add( uint16 x, uint16 y )
|
||||
{
|
||||
const uint16 one = _uint16_li( 0x0001 );
|
||||
const uint16 msb_to_lsb_sa = _uint16_li( 0x000f );
|
||||
const uint16 h_s_mask = _uint16_li( 0x8000 );
|
||||
const uint16 h_e_mask = _uint16_li( 0x7c00 );
|
||||
const uint16 h_m_mask = _uint16_li( 0x03ff );
|
||||
const uint16 h_m_msb_mask = _uint16_li( 0x2000 );
|
||||
const uint16 h_m_msb_sa = _uint16_li( 0x000d );
|
||||
const uint16 h_m_hidden = _uint16_li( 0x0400 );
|
||||
const uint16 h_e_pos = _uint16_li( 0x000a );
|
||||
const uint16 h_e_bias_minus_one = _uint16_li( 0x000e );
|
||||
const uint16 h_m_grs_carry = _uint16_li( 0x4000 );
|
||||
const uint16 h_m_grs_carry_pos = _uint16_li( 0x000e );
|
||||
const uint16 h_grs_size = _uint16_li( 0x0003 );
|
||||
const uint16 h_snan = _uint16_li( 0xfe00 );
|
||||
const uint16 h_e_mask_minus_one = _uint16_li( 0x7bff );
|
||||
const uint16 h_grs_round_carry = _uint16_sll( one, h_grs_size );
|
||||
const uint16 h_grs_round_mask = _uint16_sub( h_grs_round_carry, one );
|
||||
const uint16 x_e = _uint16_and( x, h_e_mask );
|
||||
const uint16 y_e = _uint16_and( y, h_e_mask );
|
||||
const uint16 is_y_e_larger_msb = _uint16_sub( x_e, y_e );
|
||||
const uint16 a = _uint16_sels( is_y_e_larger_msb, y, x);
|
||||
const uint16 a_s = _uint16_and( a, h_s_mask );
|
||||
const uint16 a_e = _uint16_and( a, h_e_mask );
|
||||
const uint16 a_m_no_hidden_bit = _uint16_and( a, h_m_mask );
|
||||
const uint16 a_em_no_hidden_bit = _uint16_or( a_e, a_m_no_hidden_bit );
|
||||
const uint16 b = _uint16_sels( is_y_e_larger_msb, x, y);
|
||||
const uint16 b_s = _uint16_and( b, h_s_mask );
|
||||
const uint16 b_e = _uint16_and( b, h_e_mask );
|
||||
const uint16 b_m_no_hidden_bit = _uint16_and( b, h_m_mask );
|
||||
const uint16 b_em_no_hidden_bit = _uint16_or( b_e, b_m_no_hidden_bit );
|
||||
const uint16 is_diff_sign_msb = _uint16_xor( a_s, b_s );
|
||||
const uint16 is_a_inf_msb = _uint16_sub( h_e_mask_minus_one, a_em_no_hidden_bit );
|
||||
const uint16 is_b_inf_msb = _uint16_sub( h_e_mask_minus_one, b_em_no_hidden_bit );
|
||||
const uint16 is_undenorm_msb = _uint16_dec( a_e );
|
||||
const uint16 is_undenorm = _uint16_ext( is_undenorm_msb );
|
||||
const uint16 is_both_inf_msb = _uint16_and( is_a_inf_msb, is_b_inf_msb );
|
||||
const uint16 is_invalid_inf_op_msb = _uint16_and( is_both_inf_msb, b_s );
|
||||
const uint16 is_a_e_nez_msb = _uint16_neg( a_e );
|
||||
const uint16 is_b_e_nez_msb = _uint16_neg( b_e );
|
||||
const uint16 is_a_e_nez = _uint16_ext( is_a_e_nez_msb );
|
||||
const uint16 is_b_e_nez = _uint16_ext( is_b_e_nez_msb );
|
||||
const uint16 a_m_hidden_bit = _uint16_and( is_a_e_nez, h_m_hidden );
|
||||
const uint16 b_m_hidden_bit = _uint16_and( is_b_e_nez, h_m_hidden );
|
||||
const uint16 a_m_no_grs = _uint16_or( a_m_no_hidden_bit, a_m_hidden_bit );
|
||||
const uint16 b_m_no_grs = _uint16_or( b_m_no_hidden_bit, b_m_hidden_bit );
|
||||
const uint16 diff_e = _uint16_sub( a_e, b_e );
|
||||
const uint16 a_e_unbias = _uint16_sub( a_e, h_e_bias_minus_one );
|
||||
const uint16 a_m = _uint16_sll( a_m_no_grs, h_grs_size );
|
||||
const uint16 a_e_biased = _uint16_srl( a_e, h_e_pos );
|
||||
const uint16 m_sa_unbias = _uint16_srl( a_e_unbias, h_e_pos );
|
||||
const uint16 m_sa_default = _uint16_srl( diff_e, h_e_pos );
|
||||
const uint16 m_sa_unbias_mask = _uint16_andc( is_a_e_nez_msb, is_b_e_nez_msb );
|
||||
const uint16 m_sa = _uint16_sels( m_sa_unbias_mask, m_sa_unbias, m_sa_default );
|
||||
const uint16 b_m_no_sticky = _uint16_sll( b_m_no_grs, h_grs_size );
|
||||
const uint16 sh_m = _uint16_srl( b_m_no_sticky, m_sa );
|
||||
const uint16 sticky_overflow = _uint16_sll( one, m_sa );
|
||||
const uint16 sticky_mask = _uint16_dec( sticky_overflow );
|
||||
const uint16 sticky_collect = _uint16_and( b_m_no_sticky, sticky_mask );
|
||||
const uint16 is_sticky_set_msb = _uint16_neg( sticky_collect );
|
||||
const uint16 sticky = _uint16_srl( is_sticky_set_msb, msb_to_lsb_sa);
|
||||
const uint16 b_m = _uint16_or( sh_m, sticky );
|
||||
const uint16 is_c_m_ab_pos_msb = _uint16_sub( b_m, a_m );
|
||||
const uint16 c_inf = _uint16_or( a_s, h_e_mask );
|
||||
const uint16 c_m_sum = _uint16_add( a_m, b_m );
|
||||
const uint16 c_m_diff_ab = _uint16_sub( a_m, b_m );
|
||||
const uint16 c_m_diff_ba = _uint16_sub( b_m, a_m );
|
||||
const uint16 c_m_smag_diff = _uint16_sels( is_c_m_ab_pos_msb, c_m_diff_ab, c_m_diff_ba );
|
||||
const uint16 c_s_diff = _uint16_sels( is_c_m_ab_pos_msb, a_s, b_s );
|
||||
const uint16 c_s = _uint16_sels( is_diff_sign_msb, c_s_diff, a_s );
|
||||
const uint16 c_m_smag_diff_nlz = _uint16_cntlz( c_m_smag_diff );
|
||||
const uint16 diff_norm_sa = _uint16_sub( c_m_smag_diff_nlz, one );
|
||||
const uint16 is_diff_denorm_msb = _uint16_sub( a_e_biased, diff_norm_sa );
|
||||
const uint16 is_diff_denorm = _uint16_ext( is_diff_denorm_msb );
|
||||
const uint16 is_a_or_b_norm_msb = _uint16_neg( a_e_biased );
|
||||
const uint16 diff_denorm_sa = _uint16_dec( a_e_biased );
|
||||
const uint16 c_m_diff_denorm = _uint16_sll( c_m_smag_diff, diff_denorm_sa );
|
||||
const uint16 c_m_diff_norm = _uint16_sll( c_m_smag_diff, diff_norm_sa );
|
||||
const uint16 c_e_diff_norm = _uint16_sub( a_e_biased, diff_norm_sa );
|
||||
const uint16 c_m_diff_ab_norm = _uint16_sels( is_diff_denorm_msb, c_m_diff_denorm, c_m_diff_norm );
|
||||
const uint16 c_e_diff_ab_norm = _uint16_andc( c_e_diff_norm, is_diff_denorm );
|
||||
const uint16 c_m_diff = _uint16_sels( is_a_or_b_norm_msb, c_m_diff_ab_norm, c_m_smag_diff );
|
||||
const uint16 c_e_diff = _uint16_sels( is_a_or_b_norm_msb, c_e_diff_ab_norm, a_e_biased );
|
||||
const uint16 is_diff_eqz_msb = _uint16_dec( c_m_diff );
|
||||
const uint16 is_diff_exactly_zero_msb = _uint16_and( is_diff_sign_msb, is_diff_eqz_msb );
|
||||
const uint16 is_diff_exactly_zero = _uint16_ext( is_diff_exactly_zero_msb );
|
||||
const uint16 c_m_added = _uint16_sels( is_diff_sign_msb, c_m_diff, c_m_sum );
|
||||
const uint16 c_e_added = _uint16_sels( is_diff_sign_msb, c_e_diff, a_e_biased );
|
||||
const uint16 c_m_carry = _uint16_and( c_m_added, h_m_grs_carry );
|
||||
const uint16 is_c_m_carry_msb = _uint16_neg( c_m_carry );
|
||||
const uint16 c_e_hidden_offset = _uint16_andsrl( c_m_added, h_m_grs_carry, h_m_grs_carry_pos );
|
||||
const uint16 c_m_sub_hidden = _uint16_srl( c_m_added, one );
|
||||
const uint16 c_m_no_hidden = _uint16_sels( is_c_m_carry_msb, c_m_sub_hidden, c_m_added );
|
||||
const uint16 c_e_no_hidden = _uint16_add( c_e_added, c_e_hidden_offset );
|
||||
const uint16 c_m_no_hidden_msb = _uint16_and( c_m_no_hidden, h_m_msb_mask );
|
||||
const uint16 undenorm_m_msb_odd = _uint16_srl( c_m_no_hidden_msb, h_m_msb_sa );
|
||||
const uint16 undenorm_fix_e = _uint16_and( is_undenorm, undenorm_m_msb_odd );
|
||||
const uint16 c_e_fixed = _uint16_add( c_e_no_hidden, undenorm_fix_e );
|
||||
const uint16 c_m_round_amount = _uint16_and( c_m_no_hidden, h_grs_round_mask );
|
||||
const uint16 c_m_rounded = _uint16_add( c_m_no_hidden, c_m_round_amount );
|
||||
const uint16 c_m_round_overflow = _uint16_andsrl( c_m_rounded, h_m_grs_carry, h_m_grs_carry_pos );
|
||||
const uint16 c_e_rounded = _uint16_add( c_e_fixed, c_m_round_overflow );
|
||||
const uint16 c_m_no_grs = _uint16_srlm( c_m_rounded, h_grs_size, h_m_mask );
|
||||
const uint16 c_e = _uint16_sll( c_e_rounded, h_e_pos );
|
||||
const uint16 c_em = _uint16_or( c_e, c_m_no_grs );
|
||||
const uint16 c_normal = _uint16_or( c_s, c_em );
|
||||
const uint16 c_inf_result = _uint16_sels( is_a_inf_msb, c_inf, c_normal );
|
||||
const uint16 c_zero_result = _uint16_andc( c_inf_result, is_diff_exactly_zero );
|
||||
const uint16 c_result = _uint16_sels( is_invalid_inf_op_msb, h_snan, c_zero_result );
|
||||
|
||||
return (c_result);
|
||||
}
|
@ -1,9 +0,0 @@
|
||||
#ifndef NV_MATH_HALF_H
|
||||
#define NV_MATH_HALF_H
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
|
||||
uint32 half_to_float( uint16 h );
|
||||
uint16 half_from_float( uint32 f );
|
||||
|
||||
#endif /* NV_MATH_HALF_H */
|
@ -1,38 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_MATH_KAHANSUM_H
|
||||
#define NV_MATH_KAHANSUM_H
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
class KahanSum
|
||||
{
|
||||
public:
|
||||
KahanSum() : accum(0.0f), err(0) {};
|
||||
|
||||
void add(float f)
|
||||
{
|
||||
float compensated = f + err;
|
||||
float tmp = accum + compensated;
|
||||
err = accum - tmp;
|
||||
err += compensated;
|
||||
accum = tmp;
|
||||
}
|
||||
|
||||
float sum() const
|
||||
{
|
||||
return accum;
|
||||
}
|
||||
|
||||
private:
|
||||
float accum;
|
||||
float err;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_MATH_KAHANSUM_H
|
@ -1,168 +0,0 @@
|
||||
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
|
||||
|
||||
#include <nvmath/Polygon.h>
|
||||
|
||||
#include <nvmath/Triangle.h>
|
||||
#include <nvmath/Plane.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
Polygon::Polygon()
|
||||
{
|
||||
}
|
||||
|
||||
Polygon::Polygon(const Triangle & t)
|
||||
{
|
||||
pointArray.resize(3);
|
||||
pointArray[0] = t.v[0];
|
||||
pointArray[1] = t.v[1];
|
||||
pointArray[2] = t.v[2];
|
||||
}
|
||||
|
||||
Polygon::Polygon(const Vector3 * points, uint vertexCount)
|
||||
{
|
||||
pointArray.resize(vertexCount);
|
||||
|
||||
for (uint i = 0; i < vertexCount; i++)
|
||||
{
|
||||
pointArray[i] = points[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/// Compute polygon area.
|
||||
float Polygon::area() const
|
||||
{
|
||||
float total = 0;
|
||||
|
||||
const uint pointCount = pointArray.count();
|
||||
for (uint i = 2; i < pointCount; i++)
|
||||
{
|
||||
Vector3 v1 = pointArray[i-1] - pointArray[0];
|
||||
Vector3 v2 = pointArray[i] - pointArray[0];
|
||||
|
||||
total += 0.5f * length(cross(v1, v2));
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
/// Get the bounds of the polygon.
|
||||
Box Polygon::bounds() const
|
||||
{
|
||||
Box bounds;
|
||||
bounds.clearBounds();
|
||||
foreach(p, pointArray)
|
||||
{
|
||||
bounds.addPointToBounds(pointArray[p]);
|
||||
}
|
||||
return bounds;
|
||||
}
|
||||
|
||||
|
||||
/// Get the plane of the polygon.
|
||||
Plane Polygon::plane() const
|
||||
{
|
||||
// @@ Do something better than this?
|
||||
Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]);
|
||||
return Vector4(n, dot(n, pointArray[0]));
|
||||
}
|
||||
|
||||
|
||||
/// Clip polygon to box.
|
||||
uint Polygon::clipTo(const Box & box)
|
||||
{
|
||||
const Plane posX( 1, 0, 0, box.maxCorner().x());
|
||||
const Plane negX(-1, 0, 0,-box.minCorner().x());
|
||||
const Plane posY( 0, 1, 0, box.maxCorner().y());
|
||||
const Plane negY( 0,-1, 0,-box.minCorner().y());
|
||||
const Plane posZ( 0, 0, 1, box.maxCorner().z());
|
||||
const Plane negZ( 0, 0,-1,-box.minCorner().z());
|
||||
|
||||
if (clipTo(posX) == 0) return 0;
|
||||
if (clipTo(negX) == 0) return 0;
|
||||
if (clipTo(posY) == 0) return 0;
|
||||
if (clipTo(negY) == 0) return 0;
|
||||
if (clipTo(posZ) == 0) return 0;
|
||||
if (clipTo(negZ) == 0) return 0;
|
||||
|
||||
return pointArray.count();
|
||||
}
|
||||
|
||||
|
||||
/// Clip polygon to plane.
|
||||
uint Polygon::clipTo(const Plane & plane)
|
||||
{
|
||||
int count = 0;
|
||||
|
||||
const uint pointCount = pointArray.count();
|
||||
|
||||
Array<Vector3> newPointArray(pointCount + 1); // @@ Do not create copy every time.
|
||||
|
||||
Vector3 prevPoint = pointArray[pointCount - 1];
|
||||
float prevDist = dot(plane.vector(), prevPoint) - plane.offset();
|
||||
|
||||
for (uint i = 0; i < pointCount; i++)
|
||||
{
|
||||
const Vector3 point = pointArray[i];
|
||||
float dist = dot(plane.vector(), point) - plane.offset();
|
||||
|
||||
// @@ Handle points on plane better.
|
||||
|
||||
if (dist <= 0) // interior.
|
||||
{
|
||||
if (prevDist > 0) // exterior
|
||||
{
|
||||
// Add segment intersection point.
|
||||
Vector3 dp = point - prevPoint;
|
||||
|
||||
float t = dist / prevDist;
|
||||
newPointArray.append(point - dp * t);
|
||||
}
|
||||
|
||||
// Add interior point.
|
||||
newPointArray.append(point);
|
||||
}
|
||||
else if (dist > 0 && prevDist < 0)
|
||||
{
|
||||
// Add segment intersection point.
|
||||
Vector3 dp = point - prevPoint;
|
||||
|
||||
float t = dist / prevDist;
|
||||
newPointArray.append(point - dp * t);
|
||||
}
|
||||
|
||||
prevPoint = point;
|
||||
prevDist = dist;
|
||||
}
|
||||
|
||||
swap(pointArray, newPointArray);
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
void Polygon::removeColinearPoints()
|
||||
{
|
||||
const uint pointCount = pointArray.count();
|
||||
|
||||
Array<Vector3> newPointArray(pointCount);
|
||||
|
||||
for (uint i = 0 ; i < pointCount; i++)
|
||||
{
|
||||
int j = (i + 1) % pointCount;
|
||||
int k = (i + pointCount - 1) % pointCount;
|
||||
|
||||
Vector3 v1 = normalize(pointArray[j] - pointArray[i]);
|
||||
Vector3 v2 = normalize(pointArray[i] - pointArray[k]);
|
||||
|
||||
if (dot(v1, v2) < 0.999)
|
||||
{
|
||||
newPointArray.append(pointArray[i]);
|
||||
}
|
||||
}
|
||||
|
||||
swap(pointArray, newPointArray);
|
||||
}
|
||||
|
@ -1,45 +0,0 @@
|
||||
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
|
||||
|
||||
#ifndef NV_MATH_POLYGON_H
|
||||
#define NV_MATH_POLYGON_H
|
||||
|
||||
#include <nvcore/Containers.h>
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
#include <nvmath/Vector.h>
|
||||
#include <nvmath/Box.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Box;
|
||||
class Plane;
|
||||
class Triangle;
|
||||
|
||||
|
||||
class Polygon
|
||||
{
|
||||
NV_FORBID_COPY(Polygon);
|
||||
public:
|
||||
|
||||
Polygon();
|
||||
Polygon(const Triangle & t);
|
||||
Polygon(const Vector3 * points, uint vertexCount);
|
||||
|
||||
float area() const;
|
||||
Box bounds() const;
|
||||
Plane plane() const;
|
||||
|
||||
uint clipTo(const Box & box);
|
||||
uint clipTo(const Plane & plane);
|
||||
|
||||
void removeColinearPoints();
|
||||
|
||||
private:
|
||||
|
||||
Array<Vector3> pointArray;
|
||||
};
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_MATH_POLYGON_H
|
@ -51,6 +51,7 @@ namespace nv
|
||||
|
||||
inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
|
||||
{
|
||||
// @@ Efficient SIMD implementation?
|
||||
return Quaternion(
|
||||
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
|
||||
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
|
||||
@ -58,40 +59,6 @@ namespace nv
|
||||
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
|
||||
}
|
||||
|
||||
inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
|
||||
{
|
||||
return Quaternion(
|
||||
+ a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
|
||||
- a.x() * b.z() + a.z()*b.x() + a.w()*b.y(),
|
||||
+ a.x() * b.y() - a.y()*b.x() + a.w()*b.z(),
|
||||
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() );
|
||||
}
|
||||
|
||||
inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
|
||||
{
|
||||
return Quaternion(
|
||||
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y(),
|
||||
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x(),
|
||||
+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w(),
|
||||
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z());
|
||||
}
|
||||
|
||||
inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
|
||||
{
|
||||
return mul(a, b);
|
||||
}
|
||||
|
||||
inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
|
||||
{
|
||||
return mul(a, b);
|
||||
}
|
||||
|
||||
inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
|
||||
{
|
||||
return mul(a, b);
|
||||
}
|
||||
|
||||
|
||||
inline Quaternion scale(Quaternion::Arg q, float s)
|
||||
{
|
||||
return scale(q.asVector(), s);
|
||||
@ -155,24 +122,6 @@ namespace nv
|
||||
return Quaternion(Vector4(v * s, c));
|
||||
}
|
||||
|
||||
inline Vector3 imag(Quaternion::Arg q)
|
||||
{
|
||||
return q.asVector().xyz();
|
||||
}
|
||||
|
||||
inline float real(Quaternion::Arg q)
|
||||
{
|
||||
return q.w();
|
||||
}
|
||||
|
||||
|
||||
/// Transform vector.
|
||||
inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
|
||||
{
|
||||
Quaternion t = q * v * conjugate(q);
|
||||
return imag(t);
|
||||
}
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
@ -1,726 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include <nvmath/Solver.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
namespace
|
||||
{
|
||||
class Preconditioner
|
||||
{
|
||||
public:
|
||||
// Virtual dtor.
|
||||
virtual ~Preconditioner() { }
|
||||
|
||||
// Apply preconditioning step.
|
||||
virtual void apply(const FullVector & x, FullVector & y) const = 0;
|
||||
};
|
||||
|
||||
|
||||
// Jacobi preconditioner.
|
||||
class JacobiPreconditioner : public Preconditioner
|
||||
{
|
||||
public:
|
||||
|
||||
JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
|
||||
{
|
||||
nvCheck(M.isSquare());
|
||||
|
||||
for(uint x = 0; x < M.width(); x++)
|
||||
{
|
||||
float elem = M.getCoefficient(x, x);
|
||||
nvDebugCheck( elem != 0.0f );
|
||||
|
||||
if (symmetric)
|
||||
{
|
||||
m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem));
|
||||
}
|
||||
else
|
||||
{
|
||||
m_inverseDiagonal[x] = 1.0f / elem;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void apply(const FullVector & x, FullVector & y) const
|
||||
{
|
||||
y *= x;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
FullVector m_inverseDiagonal;
|
||||
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
|
||||
static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
|
||||
static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
|
||||
|
||||
|
||||
// Solve the symmetric system: At<41>A<EFBFBD>x = At<41>b
|
||||
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
|
||||
{
|
||||
nvDebugCheck(A.width() == x.dimension());
|
||||
nvDebugCheck(A.height() == b.dimension());
|
||||
nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
|
||||
|
||||
const uint D = A.width();
|
||||
|
||||
FullVector Atb(D);
|
||||
mult(Transposed, A, b, Atb);
|
||||
|
||||
SparseMatrix AtA(D);
|
||||
mult(Transposed, A, NoTransposed, A, AtA);
|
||||
|
||||
SymmetricSolver(AtA, Atb, x, epsilon);
|
||||
}
|
||||
|
||||
|
||||
// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
|
||||
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
|
||||
{
|
||||
nvDebugCheck(A.width() == x.dimension());
|
||||
nvDebugCheck(A.height() == b.dimension());
|
||||
nvDebugCheck(A.height() >= A.width() - lockedCount);
|
||||
|
||||
// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
|
||||
|
||||
const uint D = A.width() - lockedCount;
|
||||
nvDebugCheck(D > 0);
|
||||
|
||||
// Compute: b - Al * xl
|
||||
FullVector b_Alxl(b);
|
||||
|
||||
for (uint y = 0; y < A.height(); y++)
|
||||
{
|
||||
const uint count = A.getRow(y).count();
|
||||
for (uint e = 0; e < count; e++)
|
||||
{
|
||||
uint column = A.getRow(y)[e].x;
|
||||
|
||||
bool isFree = true;
|
||||
for (uint i = 0; i < lockedCount; i++)
|
||||
{
|
||||
isFree &= (lockedParameters[i] != column);
|
||||
}
|
||||
|
||||
if (!isFree)
|
||||
{
|
||||
b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove locked columns from A.
|
||||
SparseMatrix Af(D, A.height());
|
||||
|
||||
for (uint y = 0; y < A.height(); y++)
|
||||
{
|
||||
const uint count = A.getRow(y).count();
|
||||
for (uint e = 0; e < count; e++)
|
||||
{
|
||||
uint column = A.getRow(y)[e].x;
|
||||
uint ix = column;
|
||||
|
||||
bool isFree = true;
|
||||
for (uint i = 0; i < lockedCount; i++)
|
||||
{
|
||||
isFree &= (lockedParameters[i] != column);
|
||||
if (column > lockedParameters[i]) ix--; // shift columns
|
||||
}
|
||||
|
||||
if (isFree)
|
||||
{
|
||||
Af.setCoefficient(ix, y, A.getRow(y)[e].v);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Remove elements from x
|
||||
FullVector xf(D);
|
||||
|
||||
for (uint i = 0, j = 0; i < A.width(); i++)
|
||||
{
|
||||
bool isFree = true;
|
||||
for (uint l = 0; l < lockedCount; l++)
|
||||
{
|
||||
isFree &= (lockedParameters[l] != i);
|
||||
}
|
||||
|
||||
if (isFree)
|
||||
{
|
||||
xf[j++] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Solve reduced system.
|
||||
LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
|
||||
|
||||
// Copy results back to x.
|
||||
for (uint i = 0, j = 0; i < A.width(); i++)
|
||||
{
|
||||
bool isFree = true;
|
||||
for (uint l = 0; l < lockedCount; l++)
|
||||
{
|
||||
isFree &= (lockedParameters[l] != i);
|
||||
}
|
||||
|
||||
if (isFree)
|
||||
{
|
||||
x[i] = xf[j++];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
|
||||
{
|
||||
nvDebugCheck(A.height() == A.width());
|
||||
nvDebugCheck(A.height() == b.dimension());
|
||||
nvDebugCheck(b.dimension() == x.dimension());
|
||||
|
||||
// JacobiPreconditioner jacobi(A, true);
|
||||
|
||||
// ConjugateGradientSolver(jacobi, A, b, x, epsilon);
|
||||
ConjugateGradientSolver(A, b, x, epsilon);
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Compute the solution of the sparse linear system Ab=x using the Conjugate
|
||||
* Gradient method.
|
||||
*
|
||||
* Solving sparse linear systems:
|
||||
* (1) A<>x = b
|
||||
*
|
||||
* The conjugate gradient algorithm solves (1) only in the case that A is
|
||||
* symmetric and positive definite. It is based on the idea of minimizing the
|
||||
* function
|
||||
*
|
||||
* (2) f(x) = 1/2<>x<EFBFBD>A<EFBFBD>x - b<>x
|
||||
*
|
||||
* This function is minimized when its gradient
|
||||
*
|
||||
* (3) df = A<>x - b
|
||||
*
|
||||
* is zero, which is equivalent to (1). The minimization is carried out by
|
||||
* generating a succession of search directions p.k and improved minimizers x.k.
|
||||
* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k<>p.k),
|
||||
* and x.k+1 is set equal to the new point x.k + alfa.k<>p.k. The p.k and x.k are
|
||||
* built up in such a way that x.k+1 is also the minimizer of f over the whole
|
||||
* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
|
||||
* iterations you arrive at the minimizer over the entire vector space, i.e., the
|
||||
* solution to (1).
|
||||
*
|
||||
* For a really good explanation of the method see:
|
||||
*
|
||||
* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
|
||||
* Jonhathan Richard Shewchuk.
|
||||
*
|
||||
**/
|
||||
/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
|
||||
{
|
||||
nvDebugCheck( A.isSquare() );
|
||||
nvDebugCheck( A.width() == b.dimension() );
|
||||
nvDebugCheck( A.width() == x.dimension() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.width();
|
||||
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
|
||||
|
||||
FullVector r(D); // residual
|
||||
FullVector p(D); // search direction
|
||||
FullVector q(D); //
|
||||
float delta_0;
|
||||
float delta_old;
|
||||
float delta_new;
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
// r = b - A<>x;
|
||||
copy(b, r);
|
||||
sgemv(-1, A, x, 1, r);
|
||||
|
||||
// p = r;
|
||||
copy(r, p);
|
||||
|
||||
delta_new = dot( r, r );
|
||||
delta_0 = delta_new;
|
||||
|
||||
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
|
||||
{
|
||||
i++;
|
||||
|
||||
// q = A<>p
|
||||
mult(A, p, q);
|
||||
|
||||
// alpha = delta_new / p<>q
|
||||
alpha = delta_new / dot( p, q );
|
||||
|
||||
// x = alfa<66>p + x
|
||||
saxpy(alpha, p, x);
|
||||
|
||||
if ((i & 31) == 0) // recompute r after 32 steps
|
||||
{
|
||||
// r = b - A<>x
|
||||
copy(b, r);
|
||||
sgemv(-1, A, x, 1, r);
|
||||
}
|
||||
else
|
||||
{
|
||||
// r = r - alpha<68>q
|
||||
saxpy(-alpha, q, r);
|
||||
}
|
||||
|
||||
delta_old = delta_new;
|
||||
delta_new = dot( r, r );
|
||||
|
||||
beta = delta_new / delta_old;
|
||||
|
||||
// p = r + beta<74>p
|
||||
copy(r, p);
|
||||
saxpy(beta, p, r);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
// Conjugate gradient with preconditioner.
|
||||
/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
|
||||
{
|
||||
nvDebugCheck( A.isSquare() );
|
||||
nvDebugCheck( A.width() == b.dimension() );
|
||||
nvDebugCheck( A.width() == x.dimension() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.width();
|
||||
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
|
||||
|
||||
FullVector r(D); // residual
|
||||
FullVector p(D); // search direction
|
||||
FullVector q(D); //
|
||||
FullVector s(D); // preconditioned
|
||||
float delta_0;
|
||||
float delta_old;
|
||||
float delta_new;
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
// r = b - A<>x
|
||||
copy(b, r);
|
||||
sgemv(-1, A, x, 1, r);
|
||||
|
||||
|
||||
// p = M^-1 <20> r
|
||||
preconditioner.apply(r, p);
|
||||
//copy(r, p);
|
||||
|
||||
|
||||
delta_new = dot(r, p);
|
||||
delta_0 = delta_new;
|
||||
|
||||
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
|
||||
{
|
||||
i++;
|
||||
|
||||
// q = A<>p
|
||||
mult(A, p, q);
|
||||
|
||||
// alpha = delta_new / p<>q
|
||||
alpha = delta_new / dot(p, q);
|
||||
|
||||
// x = alfa<66>p + x
|
||||
saxpy(alpha, p, x);
|
||||
|
||||
if ((i & 31) == 0) // recompute r after 32 steps
|
||||
{
|
||||
// r = b - A<>x
|
||||
copy(b, r);
|
||||
sgemv(-1, A, x, 1, r);
|
||||
}
|
||||
else
|
||||
{
|
||||
// r = r - alfa<66>q
|
||||
saxpy(-alpha, q, r);
|
||||
}
|
||||
|
||||
// s = M^-1 <20> r
|
||||
preconditioner.apply(r, s);
|
||||
//copy(r, s);
|
||||
|
||||
delta_old = delta_new;
|
||||
delta_new = dot( r, s );
|
||||
|
||||
beta = delta_new / delta_old;
|
||||
|
||||
// p = s + beta<74>p
|
||||
copy(s, p);
|
||||
saxpy(beta, p, s);
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
#if 0 // Nonsymmetric solvers
|
||||
|
||||
/** Bi-conjugate gradient method. */
|
||||
MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
|
||||
piDebugCheck( A.IsSquare() );
|
||||
piDebugCheck( A.Width() == b.Dim() );
|
||||
piDebugCheck( A.Width() == x.Dim() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.Width();
|
||||
const int i_max = 4 * D;
|
||||
|
||||
float resid;
|
||||
float rho_1 = 0;
|
||||
float rho_2 = 0;
|
||||
float alpha;
|
||||
float beta;
|
||||
|
||||
DenseVector r(D);
|
||||
DenseVector rtilde(D);
|
||||
DenseVector p(D);
|
||||
DenseVector ptilde(D);
|
||||
DenseVector q(D);
|
||||
DenseVector qtilde(D);
|
||||
DenseVector tmp(D); // temporal vector.
|
||||
|
||||
// r = b - A<>x;
|
||||
A.Product( x, tmp );
|
||||
r.Sub( b, tmp );
|
||||
|
||||
// rtilde = r
|
||||
rtilde.Set( r );
|
||||
|
||||
// p = r;
|
||||
p.Set( r );
|
||||
|
||||
// ptilde = rtilde
|
||||
ptilde.Set( rtilde );
|
||||
|
||||
|
||||
|
||||
float normb = b.Norm();
|
||||
if( normb == 0.0 ) normb = 1;
|
||||
|
||||
// test convergence
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// method converges?
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
while( i < i_max ) {
|
||||
|
||||
i++;
|
||||
|
||||
rho_1 = DenseVectorDotProduct( r, rtilde );
|
||||
|
||||
if( rho_1 == 0 ) {
|
||||
// method fails.
|
||||
return -i;
|
||||
}
|
||||
|
||||
if (i == 1) {
|
||||
p.Set( r );
|
||||
ptilde.Set( rtilde );
|
||||
}
|
||||
else {
|
||||
beta = rho_1 / rho_2;
|
||||
|
||||
// p = r + beta * p;
|
||||
p.Mad( r, p, beta );
|
||||
|
||||
// ptilde = ztilde + beta * ptilde;
|
||||
ptilde.Mad( rtilde, ptilde, beta );
|
||||
}
|
||||
|
||||
// q = A * p;
|
||||
A.Product( p, q );
|
||||
|
||||
// qtilde = A^t * ptilde;
|
||||
A.TransProduct( ptilde, qtilde );
|
||||
|
||||
alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
|
||||
|
||||
// x += alpha * p;
|
||||
x.Mad( x, p, alpha );
|
||||
|
||||
// r -= alpha * q;
|
||||
r.Mad( r, q, -alpha );
|
||||
|
||||
// rtilde -= alpha * qtilde;
|
||||
rtilde.Mad( rtilde, qtilde, -alpha );
|
||||
|
||||
rho_2 = rho_1;
|
||||
|
||||
// test convergence
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// method converges
|
||||
return i;
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/** Bi-conjugate gradient stabilized method. */
|
||||
int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
|
||||
piDebugCheck( A.IsSquare() );
|
||||
piDebugCheck( A.Width() == b.Dim() );
|
||||
piDebugCheck( A.Width() == x.Dim() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.Width();
|
||||
const int i_max = 2 * D;
|
||||
|
||||
|
||||
float resid;
|
||||
float rho_1 = 0;
|
||||
float rho_2 = 0;
|
||||
float alpha = 0;
|
||||
float beta = 0;
|
||||
float omega = 0;
|
||||
|
||||
DenseVector p(D);
|
||||
DenseVector phat(D);
|
||||
DenseVector s(D);
|
||||
DenseVector shat(D);
|
||||
DenseVector t(D);
|
||||
DenseVector v(D);
|
||||
|
||||
DenseVector r(D);
|
||||
DenseVector rtilde(D);
|
||||
|
||||
DenseVector tmp(D);
|
||||
|
||||
// r = b - A<>x;
|
||||
A.Product( x, tmp );
|
||||
r.Sub( b, tmp );
|
||||
|
||||
// rtilde = r
|
||||
rtilde.Set( r );
|
||||
|
||||
|
||||
float normb = b.Norm();
|
||||
if( normb == 0.0 ) normb = 1;
|
||||
|
||||
// test convergence
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// method converges?
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
while( i<i_max ) {
|
||||
|
||||
i++;
|
||||
|
||||
rho_1 = DenseVectorDotProduct( rtilde, r );
|
||||
if( rho_1 == 0 ) {
|
||||
// method fails
|
||||
return -i;
|
||||
}
|
||||
|
||||
|
||||
if( i == 1 ) {
|
||||
p.Set( r );
|
||||
}
|
||||
else {
|
||||
beta = (rho_1 / rho_2) * (alpha / omega);
|
||||
|
||||
// p = r + beta * (p - omega * v);
|
||||
p.Mad( p, v, -omega );
|
||||
p.Mad( r, p, beta );
|
||||
}
|
||||
|
||||
//phat = M.solve(p);
|
||||
phat.Set( p );
|
||||
//Precond( &phat, p );
|
||||
|
||||
//v = A * phat;
|
||||
A.Product( phat, v );
|
||||
|
||||
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
|
||||
|
||||
// s = r - alpha * v;
|
||||
s.Mad( r, v, -alpha );
|
||||
|
||||
|
||||
resid = s.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// x += alpha * phat;
|
||||
x.Mad( x, phat, alpha );
|
||||
return i;
|
||||
}
|
||||
|
||||
//shat = M.solve(s);
|
||||
shat.Set( s );
|
||||
//Precond( &shat, s );
|
||||
|
||||
//t = A * shat;
|
||||
A.Product( shat, t );
|
||||
|
||||
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
|
||||
|
||||
// x += alpha * phat + omega * shat;
|
||||
x.Mad( x, shat, omega );
|
||||
x.Mad( x, phat, alpha );
|
||||
|
||||
//r = s - omega * t;
|
||||
r.Mad( s, t, -omega );
|
||||
|
||||
rho_2 = rho_1;
|
||||
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
return i;
|
||||
}
|
||||
|
||||
if( omega == 0 ) {
|
||||
return -i; // ???
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/** Bi-conjugate gradient stabilized method. */
|
||||
int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
|
||||
piDebugCheck( A.IsSquare() );
|
||||
piDebugCheck( A.Width() == b.Dim() );
|
||||
piDebugCheck( A.Width() == x.Dim() );
|
||||
|
||||
int i = 0;
|
||||
const int D = A.Width();
|
||||
const int i_max = D;
|
||||
// const int i_max = 1000;
|
||||
|
||||
|
||||
float resid;
|
||||
float rho_1 = 0;
|
||||
float rho_2 = 0;
|
||||
float alpha = 0;
|
||||
float beta = 0;
|
||||
float omega = 0;
|
||||
|
||||
DenseVector p(D);
|
||||
DenseVector phat(D);
|
||||
DenseVector s(D);
|
||||
DenseVector shat(D);
|
||||
DenseVector t(D);
|
||||
DenseVector v(D);
|
||||
|
||||
DenseVector r(D);
|
||||
DenseVector rtilde(D);
|
||||
|
||||
DenseVector tmp(D);
|
||||
|
||||
// r = b - A<>x;
|
||||
A.Product( x, tmp );
|
||||
r.Sub( b, tmp );
|
||||
|
||||
// rtilde = r
|
||||
rtilde.Set( r );
|
||||
|
||||
|
||||
float normb = b.Norm();
|
||||
if( normb == 0.0 ) normb = 1;
|
||||
|
||||
// test convergence
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
// method converges?
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
while( i<i_max ) {
|
||||
|
||||
i++;
|
||||
|
||||
rho_1 = DenseVectorDotProduct( rtilde, r );
|
||||
if( rho_1 == 0 ) {
|
||||
// method fails
|
||||
return -i;
|
||||
}
|
||||
|
||||
|
||||
if( i == 1 ) {
|
||||
p.Set( r );
|
||||
}
|
||||
else {
|
||||
beta = (rho_1 / rho_2) * (alpha / omega);
|
||||
|
||||
// p = r + beta * (p - omega * v);
|
||||
p.Mad( p, v, -omega );
|
||||
p.Mad( r, p, beta );
|
||||
}
|
||||
|
||||
//phat = M.solve(p);
|
||||
//phat.Set( p );
|
||||
M.Precond( &phat, p );
|
||||
|
||||
//v = A * phat;
|
||||
A.Product( phat, v );
|
||||
|
||||
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
|
||||
|
||||
// s = r - alpha * v;
|
||||
s.Mad( r, v, -alpha );
|
||||
|
||||
|
||||
resid = s.Norm() / normb;
|
||||
|
||||
//printf( "--- Iteration %d: residual = %f\n", i, resid );
|
||||
|
||||
if( resid < epsilon ) {
|
||||
// x += alpha * phat;
|
||||
x.Mad( x, phat, alpha );
|
||||
return i;
|
||||
}
|
||||
|
||||
//shat = M.solve(s);
|
||||
//shat.Set( s );
|
||||
M.Precond( &shat, s );
|
||||
|
||||
//t = A * shat;
|
||||
A.Product( shat, t );
|
||||
|
||||
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
|
||||
|
||||
// x += alpha * phat + omega * shat;
|
||||
x.Mad( x, shat, omega );
|
||||
x.Mad( x, phat, alpha );
|
||||
|
||||
//r = s - omega * t;
|
||||
r.Mad( s, t, -omega );
|
||||
|
||||
rho_2 = rho_1;
|
||||
|
||||
resid = r.Norm() / normb;
|
||||
if( resid < epsilon ) {
|
||||
return i;
|
||||
}
|
||||
|
||||
if( omega == 0 ) {
|
||||
return -i; // ???
|
||||
}
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
#endif
|
@ -1,20 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_MATH_SOLVER_H
|
||||
#define NV_MATH_SOLVER_H
|
||||
|
||||
#include <nvmath/Sparse.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
// Linear solvers.
|
||||
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
|
||||
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
|
||||
NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
|
||||
// NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_MATH_SOLVER_H
|
@ -1,831 +0,0 @@
|
||||
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
|
||||
|
||||
#include <nvmath/Sparse.h>
|
||||
#include <nvmath/KahanSum.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
/// Ctor.
|
||||
FullVector::FullVector(uint dim)
|
||||
{
|
||||
m_array.resize(dim);
|
||||
}
|
||||
|
||||
/// Copy ctor.
|
||||
FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
|
||||
{
|
||||
}
|
||||
|
||||
/// Copy operator
|
||||
const FullVector & FullVector::operator=(const FullVector & v)
|
||||
{
|
||||
nvCheck(dimension() == v.dimension());
|
||||
|
||||
m_array = v.m_array;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
void FullVector::fill(float f)
|
||||
{
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] = f;
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator+= (const FullVector & v)
|
||||
{
|
||||
nvDebugCheck(dimension() == v.dimension());
|
||||
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] += v.m_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator-= (const FullVector & v)
|
||||
{
|
||||
nvDebugCheck(dimension() == v.dimension());
|
||||
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] -= v.m_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator*= (const FullVector & v)
|
||||
{
|
||||
nvDebugCheck(dimension() == v.dimension());
|
||||
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] *= v.m_array[i];
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator+= (float f)
|
||||
{
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] += f;
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator-= (float f)
|
||||
{
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] -= f;
|
||||
}
|
||||
}
|
||||
|
||||
void FullVector::operator*= (float f)
|
||||
{
|
||||
const uint dim = dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
m_array[i] *= f;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::saxpy(float a, const FullVector & x, FullVector & y)
|
||||
{
|
||||
nvDebugCheck(x.dimension() == y.dimension());
|
||||
|
||||
const uint dim = x.dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
y[i] += a * x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void nv::copy(const FullVector & x, FullVector & y)
|
||||
{
|
||||
nvDebugCheck(x.dimension() == y.dimension());
|
||||
|
||||
const uint dim = x.dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
y[i] = x[i];
|
||||
}
|
||||
}
|
||||
|
||||
void nv::scal(float a, FullVector & x)
|
||||
{
|
||||
const uint dim = x.dimension();
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
x[i] *= a;
|
||||
}
|
||||
}
|
||||
|
||||
float nv::dot(const FullVector & x, const FullVector & y)
|
||||
{
|
||||
nvDebugCheck(x.dimension() == y.dimension());
|
||||
|
||||
const uint dim = x.dimension();
|
||||
|
||||
/*float sum = 0;
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
sum += x[i] * y[i];
|
||||
}
|
||||
return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
|
||||
for (uint i = 0; i < dim; i++)
|
||||
{
|
||||
kahan.add(x[i] * y[i]);
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
|
||||
FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
|
||||
{
|
||||
m_array.resize(d*d, 0.0f);
|
||||
}
|
||||
|
||||
FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
|
||||
{
|
||||
m_array.resize(w*h, 0.0f);
|
||||
}
|
||||
|
||||
FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
|
||||
{
|
||||
m_array = m.m_array;
|
||||
}
|
||||
|
||||
const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
|
||||
{
|
||||
nvCheck(width() == m.width());
|
||||
nvCheck(height() == m.height());
|
||||
|
||||
m_array = m.m_array;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
float FullMatrix::getCoefficient(uint x, uint y) const
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
return m_array[y * width() + x];
|
||||
}
|
||||
|
||||
void FullMatrix::setCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
m_array[y * width() + x] = f;
|
||||
}
|
||||
|
||||
void FullMatrix::addCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
m_array[y * width() + x] += f;
|
||||
}
|
||||
|
||||
void FullMatrix::mulCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
m_array[y * width() + x] *= f;
|
||||
}
|
||||
|
||||
float FullMatrix::dotRow(uint y, const FullVector & v) const
|
||||
{
|
||||
nvDebugCheck( v.dimension() == width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
float sum = 0;
|
||||
|
||||
const uint count = v.dimension();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
sum += m_array[y * count + i] * v[i];
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
|
||||
{
|
||||
nvDebugCheck( v.dimension() == width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = v.dimension();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
v[i] += m_array[y * count + i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// y = M * x
|
||||
void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
|
||||
{
|
||||
mult(NoTransposed, M, x, y);
|
||||
}
|
||||
|
||||
void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
|
||||
{
|
||||
const uint w = M.width();
|
||||
const uint h = M.height();
|
||||
|
||||
if (TM == Transposed)
|
||||
{
|
||||
nvDebugCheck( h == x.dimension() );
|
||||
nvDebugCheck( w == y.dimension() );
|
||||
|
||||
y.fill(0.0f);
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
M.madRow(i, x[i], y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvDebugCheck( w == x.dimension() );
|
||||
nvDebugCheck( h == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
y[i] = M.dotRow(i, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
|
||||
{
|
||||
sgemv(alpha, NoTransposed, A, x, beta, y);
|
||||
}
|
||||
|
||||
void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
|
||||
{
|
||||
const uint w = A.width();
|
||||
const uint h = A.height();
|
||||
|
||||
if (TA == Transposed)
|
||||
{
|
||||
nvDebugCheck( h == x.dimension() );
|
||||
nvDebugCheck( w == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
A.madRow(i, alpha * x[i], y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvDebugCheck( w == x.dimension() );
|
||||
nvDebugCheck( h == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// Multiply a row of A by a column of B.
|
||||
static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
|
||||
{
|
||||
const uint w = (TA == NoTransposed) ? A.width() : A.height();
|
||||
nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width());
|
||||
|
||||
float sum = 0.0f;
|
||||
|
||||
for (uint k = 0; k < w; k++)
|
||||
{
|
||||
const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop?
|
||||
const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
|
||||
sum += a * b;
|
||||
}
|
||||
|
||||
return sum;
|
||||
}
|
||||
|
||||
|
||||
// C = A * B
|
||||
void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
|
||||
{
|
||||
mult(NoTransposed, A, NoTransposed, B, C);
|
||||
}
|
||||
|
||||
void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
|
||||
{
|
||||
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
|
||||
}
|
||||
|
||||
// C = alpha*A*B + beta*C
|
||||
void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
|
||||
{
|
||||
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
|
||||
}
|
||||
|
||||
void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
|
||||
{
|
||||
const uint w = C.width();
|
||||
const uint h = C.height();
|
||||
|
||||
uint aw = (TA == NoTransposed) ? A.width() : A.height();
|
||||
uint ah = (TA == NoTransposed) ? A.height() : A.width();
|
||||
uint bw = (TB == NoTransposed) ? B.width() : B.height();
|
||||
uint bh = (TB == NoTransposed) ? B.height() : B.width();
|
||||
|
||||
nvDebugCheck(aw == bh);
|
||||
nvDebugCheck(bw == ah);
|
||||
nvDebugCheck(w == bw);
|
||||
nvDebugCheck(h == ah);
|
||||
|
||||
for (uint y = 0; y < h; y++)
|
||||
{
|
||||
for (uint x = 0; x < w; x++)
|
||||
{
|
||||
float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
|
||||
C.setCoefficient(x, y, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/// Ctor. Init the size of the sparse matrix.
|
||||
SparseMatrix::SparseMatrix(uint d) : m_width(d)
|
||||
{
|
||||
m_array.resize(d);
|
||||
}
|
||||
|
||||
/// Ctor. Init the size of the sparse matrix.
|
||||
SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
|
||||
{
|
||||
m_array.resize(h);
|
||||
}
|
||||
|
||||
SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
|
||||
{
|
||||
m_array = m.m_array;
|
||||
}
|
||||
|
||||
const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
|
||||
{
|
||||
nvCheck(width() == m.width());
|
||||
nvCheck(height() == m.height());
|
||||
|
||||
m_array = m.m_array;
|
||||
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
// x is column, y is row
|
||||
float SparseMatrix::getCoefficient(uint x, uint y) const
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
if (m_array[y][i].x == x) return m_array[y][i].v;
|
||||
}
|
||||
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
void SparseMatrix::setCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
if (m_array[y][i].x == x)
|
||||
{
|
||||
m_array[y][i].v = f;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Coefficient c = { x, f };
|
||||
m_array[y].append( c );
|
||||
}
|
||||
|
||||
void SparseMatrix::addCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
if (m_array[y][i].x == x)
|
||||
{
|
||||
m_array[y][i].v += f;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Coefficient c = { x, f };
|
||||
m_array[y].append( c );
|
||||
}
|
||||
|
||||
void SparseMatrix::mulCoefficient(uint x, uint y, float f)
|
||||
{
|
||||
nvDebugCheck( x < width() );
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
if (m_array[y][i].x == x)
|
||||
{
|
||||
m_array[y][i].v *= f;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
Coefficient c = { x, f };
|
||||
m_array[y].append( c );
|
||||
}
|
||||
|
||||
|
||||
float SparseMatrix::sumRow(uint y) const
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
|
||||
/*float sum = 0;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
sum += m_array[y][i].v;
|
||||
}
|
||||
return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
kahan.add(m_array[y][i].v);
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
float SparseMatrix::dotRow(uint y, const FullVector & v) const
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
|
||||
/*float sum = 0;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
sum += m_array[y][i].v * v[m_array[y][i].x];
|
||||
}
|
||||
return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
|
||||
{
|
||||
nvDebugCheck(y < height());
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
v[m_array[y][i].x] += alpha * m_array[y][i].v;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void SparseMatrix::clearRow(uint y)
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
m_array[y].clear();
|
||||
}
|
||||
|
||||
void SparseMatrix::scaleRow(uint y, float f)
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
m_array[y][i].v *= f;
|
||||
}
|
||||
}
|
||||
|
||||
void SparseMatrix::normalizeRow(uint y)
|
||||
{
|
||||
nvDebugCheck( y < height() );
|
||||
|
||||
float norm = 0.0f;
|
||||
|
||||
const uint count = m_array[y].count();
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
float f = m_array[y][i].v;
|
||||
norm += f * f;
|
||||
}
|
||||
|
||||
scaleRow(y, 1.0f / sqrtf(norm));
|
||||
}
|
||||
|
||||
|
||||
void SparseMatrix::clearColumn(uint x)
|
||||
{
|
||||
nvDebugCheck(x < width());
|
||||
|
||||
for (uint y = 0; y < height(); y++)
|
||||
{
|
||||
const uint count = m_array[y].count();
|
||||
for (uint e = 0; e < count; e++)
|
||||
{
|
||||
if (m_array[y][e].x == x)
|
||||
{
|
||||
m_array[y][e].v = 0.0f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SparseMatrix::scaleColumn(uint x, float f)
|
||||
{
|
||||
nvDebugCheck(x < width());
|
||||
|
||||
for (uint y = 0; y < height(); y++)
|
||||
{
|
||||
const uint count = m_array[y].count();
|
||||
for (uint e = 0; e < count; e++)
|
||||
{
|
||||
if (m_array[y][e].x == x)
|
||||
{
|
||||
m_array[y][e].v *= f;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
|
||||
{
|
||||
return m_array[y];
|
||||
}
|
||||
|
||||
|
||||
// y = M * x
|
||||
void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
|
||||
{
|
||||
mult(NoTransposed, M, x, y);
|
||||
}
|
||||
|
||||
void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
|
||||
{
|
||||
const uint w = M.width();
|
||||
const uint h = M.height();
|
||||
|
||||
if (TM == Transposed)
|
||||
{
|
||||
nvDebugCheck( h == x.dimension() );
|
||||
nvDebugCheck( w == y.dimension() );
|
||||
|
||||
y.fill(0.0f);
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
M.madRow(i, x[i], y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvDebugCheck( w == x.dimension() );
|
||||
nvDebugCheck( h == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
y[i] = M.dotRow(i, x);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
|
||||
{
|
||||
sgemv(alpha, NoTransposed, A, x, beta, y);
|
||||
}
|
||||
|
||||
void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
|
||||
{
|
||||
const uint w = A.width();
|
||||
const uint h = A.height();
|
||||
|
||||
if (TA == Transposed)
|
||||
{
|
||||
nvDebugCheck( h == x.dimension() );
|
||||
nvDebugCheck( w == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
A.madRow(i, alpha * x[i], y);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvDebugCheck( w == x.dimension() );
|
||||
nvDebugCheck( h == y.dimension() );
|
||||
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// dot y-row of A by x-column of B
|
||||
static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
|
||||
{
|
||||
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
|
||||
|
||||
const uint count = row.count();
|
||||
|
||||
/*float sum = 0.0f;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
const SparseMatrix::Coefficient & c = row[i];
|
||||
sum += c.v * B.getCoefficient(x, c.x);
|
||||
}
|
||||
return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
const SparseMatrix::Coefficient & c = row[i];
|
||||
kahan.add(c.v * B.getCoefficient(x, c.x));
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
// dot y-row of A by x-row of B
|
||||
static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
|
||||
{
|
||||
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
|
||||
|
||||
const uint count = row.count();
|
||||
|
||||
/*float sum = 0.0f;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
const SparseMatrix::Coefficient & c = row[i];
|
||||
sum += c.v * B.getCoefficient(c.x, x);
|
||||
}
|
||||
//return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
const SparseMatrix::Coefficient & c = row[i];
|
||||
kahan.add(c.v * B.getCoefficient(c.x, x));
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
// dot y-column of A by x-column of B
|
||||
static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
|
||||
{
|
||||
nvDebugCheck(A.height() == B.height());
|
||||
|
||||
const uint h = A.height();
|
||||
|
||||
/*float sum = 0.0f;
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
|
||||
}
|
||||
//return sum;*/
|
||||
|
||||
KahanSum kahan;
|
||||
for (uint i = 0; i < h; i++)
|
||||
{
|
||||
kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
|
||||
}
|
||||
|
||||
return kahan.sum();
|
||||
}
|
||||
|
||||
|
||||
|
||||
// C = A * B
|
||||
void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
|
||||
{
|
||||
mult(NoTransposed, A, NoTransposed, B, C);
|
||||
}
|
||||
|
||||
void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
|
||||
{
|
||||
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
|
||||
}
|
||||
|
||||
// C = alpha*A*B + beta*C
|
||||
void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
|
||||
{
|
||||
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
|
||||
}
|
||||
|
||||
void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
|
||||
{
|
||||
const uint w = C.width();
|
||||
const uint h = C.height();
|
||||
|
||||
uint aw = (TA == NoTransposed) ? A.width() : A.height();
|
||||
uint ah = (TA == NoTransposed) ? A.height() : A.width();
|
||||
uint bw = (TB == NoTransposed) ? B.width() : B.height();
|
||||
uint bh = (TB == NoTransposed) ? B.height() : B.width();
|
||||
|
||||
nvDebugCheck(aw == bh);
|
||||
nvDebugCheck(bw == ah);
|
||||
nvDebugCheck(w == bw);
|
||||
nvDebugCheck(h == ah);
|
||||
|
||||
|
||||
for (uint y = 0; y < h; y++)
|
||||
{
|
||||
for (uint x = 0; x < w; x++)
|
||||
{
|
||||
float c = beta * C.getCoefficient(x, y);
|
||||
|
||||
if (TA == NoTransposed && TB == NoTransposed)
|
||||
{
|
||||
// dot y-row of A by x-column of B.
|
||||
c += alpha * dotRowColumn(y, A, x, B);
|
||||
}
|
||||
else if (TA == Transposed && TB == Transposed)
|
||||
{
|
||||
// dot y-column of A by x-row of B.
|
||||
c += alpha * dotRowColumn(x, B, y, A);
|
||||
}
|
||||
else if (TA == Transposed && TB == NoTransposed)
|
||||
{
|
||||
// dot y-column of A by x-column of B.
|
||||
c += alpha * dotColumnColumn(y, A, x, B);
|
||||
}
|
||||
else if (TA == NoTransposed && TB == Transposed)
|
||||
{
|
||||
// dot y-row of A by x-row of B.
|
||||
c += alpha * dotRowRow(y, A, x, B);
|
||||
}
|
||||
|
||||
if (c != 0.0f)
|
||||
{
|
||||
C.setCoefficient(x, y, c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// C = At * A
|
||||
void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
|
||||
{
|
||||
// This is quite expensive...
|
||||
mult(Transposed, A, NoTransposed, A, C);
|
||||
}
|
@ -1,198 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_MATH_SPARSE_H
|
||||
#define NV_MATH_SPARSE_H
|
||||
|
||||
#include <nvcore/Containers.h>
|
||||
#include <nvmath/nvmath.h>
|
||||
|
||||
// Full and sparse vector and matrix classes. BLAS subset.
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class FullVector;
|
||||
class FullMatrix;
|
||||
class SparseMatrix;
|
||||
|
||||
|
||||
/// Fixed size vector class.
|
||||
class FullVector
|
||||
{
|
||||
public:
|
||||
|
||||
FullVector(uint dim);
|
||||
FullVector(const FullVector & v);
|
||||
|
||||
const FullVector & operator=(const FullVector & v);
|
||||
|
||||
uint dimension() const { return m_array.count(); }
|
||||
|
||||
const float & operator[]( uint index ) const { return m_array[index]; }
|
||||
float & operator[] ( uint index ) { return m_array[index]; }
|
||||
|
||||
void fill(float f);
|
||||
|
||||
void operator+= (const FullVector & v);
|
||||
void operator-= (const FullVector & v);
|
||||
void operator*= (const FullVector & v);
|
||||
|
||||
void operator+= (float f);
|
||||
void operator-= (float f);
|
||||
void operator*= (float f);
|
||||
|
||||
|
||||
private:
|
||||
|
||||
Array<float> m_array;
|
||||
|
||||
};
|
||||
|
||||
// Pseudo-BLAS interface.
|
||||
NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y
|
||||
NVMATH_API void copy(const FullVector & x, FullVector & y);
|
||||
NVMATH_API void scal(float a, FullVector & x);
|
||||
NVMATH_API float dot(const FullVector & x, const FullVector & y);
|
||||
|
||||
|
||||
enum Transpose
|
||||
{
|
||||
NoTransposed = 0,
|
||||
Transposed = 1
|
||||
};
|
||||
|
||||
/// Full matrix class.
|
||||
class FullMatrix
|
||||
{
|
||||
public:
|
||||
|
||||
FullMatrix(uint d);
|
||||
FullMatrix(uint w, uint h);
|
||||
FullMatrix(const FullMatrix & m);
|
||||
|
||||
const FullMatrix & operator=(const FullMatrix & m);
|
||||
|
||||
uint width() const { return m_width; }
|
||||
uint height() const { return m_height; }
|
||||
bool isSquare() const { return m_width == m_height; }
|
||||
|
||||
float getCoefficient(uint x, uint y) const;
|
||||
|
||||
void setCoefficient(uint x, uint y, float f);
|
||||
void addCoefficient(uint x, uint y, float f);
|
||||
void mulCoefficient(uint x, uint y, float f);
|
||||
|
||||
float dotRow(uint y, const FullVector & v) const;
|
||||
void madRow(uint y, float alpha, FullVector & v) const;
|
||||
|
||||
protected:
|
||||
|
||||
bool isValid() const {
|
||||
return m_array.size() == (m_width * m_height);
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
const uint m_width;
|
||||
const uint m_height;
|
||||
Array<float> m_array;
|
||||
|
||||
};
|
||||
|
||||
NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
|
||||
NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
|
||||
NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
|
||||
|
||||
NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
|
||||
NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
|
||||
|
||||
// C = alpha*A*B + beta*C
|
||||
NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
|
||||
NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
|
||||
|
||||
|
||||
/**
|
||||
* Sparse matrix class. The matrix is assumed to be sparse and to have
|
||||
* very few non-zero elements, for this reason it's stored in indexed
|
||||
* format. To multiply column vectors efficiently, the matrix stores
|
||||
* the elements in indexed-column order, there is a list of indexed
|
||||
* elements for each row of the matrix. As with the FullVector the
|
||||
* dimension of the matrix is constant.
|
||||
**/
|
||||
class SparseMatrix
|
||||
{
|
||||
friend class FullMatrix;
|
||||
public:
|
||||
|
||||
// An element of the sparse array.
|
||||
struct Coefficient {
|
||||
uint x; // column
|
||||
float v; // value
|
||||
};
|
||||
|
||||
|
||||
public:
|
||||
|
||||
SparseMatrix(uint d);
|
||||
SparseMatrix(uint w, uint h);
|
||||
SparseMatrix(const SparseMatrix & m);
|
||||
|
||||
const SparseMatrix & operator=(const SparseMatrix & m);
|
||||
|
||||
|
||||
uint width() const { return m_width; }
|
||||
uint height() const { return m_array.count(); }
|
||||
bool isSquare() const { return width() == height(); }
|
||||
|
||||
float getCoefficient(uint x, uint y) const; // x is column, y is row
|
||||
|
||||
void setCoefficient(uint x, uint y, float f);
|
||||
void addCoefficient(uint x, uint y, float f);
|
||||
void mulCoefficient(uint x, uint y, float f);
|
||||
|
||||
float sumRow(uint y) const;
|
||||
float dotRow(uint y, const FullVector & v) const;
|
||||
void madRow(uint y, float alpha, FullVector & v) const;
|
||||
|
||||
void clearRow(uint y);
|
||||
void scaleRow(uint y, float f);
|
||||
void normalizeRow(uint y);
|
||||
|
||||
void clearColumn(uint x);
|
||||
void scaleColumn(uint x, float f);
|
||||
|
||||
const Array<Coefficient> & getRow(uint y) const;
|
||||
|
||||
private:
|
||||
|
||||
/// Number of columns.
|
||||
const uint m_width;
|
||||
|
||||
/// Array of matrix elements.
|
||||
Array< Array<Coefficient> > m_array;
|
||||
|
||||
};
|
||||
|
||||
NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
|
||||
NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
|
||||
|
||||
// y = alpha*A*x + beta*y
|
||||
NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
|
||||
NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
|
||||
|
||||
NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
|
||||
NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
|
||||
|
||||
// C = alpha*A*B + beta*C
|
||||
NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
|
||||
NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
|
||||
|
||||
// C = At * A
|
||||
NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_MATH_SPARSE_H
|
@ -11,10 +11,8 @@ namespace
|
||||
// Basic integer factorial.
|
||||
inline static int factorial( int v )
|
||||
{
|
||||
const static int fac_table[] = { 1, 1, 2, 6, 24, 120, 720, 5040, 40320, 362880, 3628800, 39916800 };
|
||||
|
||||
if(v <= 11){
|
||||
return fac_table[v];
|
||||
if (v == 0) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
int result = v;
|
||||
|
@ -3,7 +3,6 @@
|
||||
#ifndef NV_MATH_SPHERICALHARMONIC_H
|
||||
#define NV_MATH_SPHERICALHARMONIC_H
|
||||
|
||||
#include <string.h> // memcpy
|
||||
#include <nvmath/Vector.h>
|
||||
|
||||
namespace nv
|
||||
|
@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg
|
||||
if(min>rad || max<-rad) return false;
|
||||
|
||||
|
||||
bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
|
||||
bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
|
||||
{
|
||||
// use separating axis theorem to test overlap between triangle and box
|
||||
// need to test for overlap in these directions:
|
||||
@ -170,7 +170,7 @@ bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const T
|
||||
}
|
||||
|
||||
|
||||
bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
|
||||
bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
|
||||
{
|
||||
// use separating axis theorem to test overlap between triangle and box
|
||||
// need to test for overlap in these directions:
|
||||
|
@ -1,82 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include <nvcore/Stream.h>
|
||||
|
||||
#include <nvmath/Vector.h>
|
||||
#include <nvmath/Matrix.h>
|
||||
#include <nvmath/Quaternion.h>
|
||||
#include <nvmath/Basis.h>
|
||||
#include <nvmath/Box.h>
|
||||
|
||||
#include <nvmath/TypeSerialization.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Vector2 & v)
|
||||
{
|
||||
float x = v.x();
|
||||
float y = v.y();
|
||||
|
||||
s << x << y;
|
||||
|
||||
if (s.isLoading())
|
||||
{
|
||||
v.set(x, y);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Vector3 & v)
|
||||
{
|
||||
float x = v.x();
|
||||
float y = v.y();
|
||||
float z = v.z();
|
||||
|
||||
s << x << y << z;
|
||||
|
||||
if (s.isLoading())
|
||||
{
|
||||
v.set(x, y, z);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Vector4 & v)
|
||||
{
|
||||
float x = v.x();
|
||||
float y = v.y();
|
||||
float z = v.z();
|
||||
float w = v.w();
|
||||
|
||||
s << x << y << z << w;
|
||||
|
||||
if (s.isLoading())
|
||||
{
|
||||
v.set(x, y, z, w);
|
||||
}
|
||||
|
||||
return s;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Matrix & m)
|
||||
{
|
||||
return s;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Quaternion & q)
|
||||
{
|
||||
return s << q.asVector();
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Basis & basis)
|
||||
{
|
||||
return s << basis.tangent << basis.bitangent << basis.normal;
|
||||
}
|
||||
|
||||
Stream & nv::operator<< (Stream & s, Box & box)
|
||||
{
|
||||
return s << box.m_mins << box.m_maxs;
|
||||
}
|
||||
|
@ -1,32 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_MATH_TYPESERIALIZATION_H
|
||||
#define NV_MATH_TYPESERIALIZATION_H
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Stream;
|
||||
|
||||
class Vector2;
|
||||
class Vector3;
|
||||
class Vector4;
|
||||
|
||||
class Matrix;
|
||||
class Quaternion;
|
||||
struct Basis;
|
||||
class Box;
|
||||
|
||||
NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
|
||||
|
||||
NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
|
||||
NVMATH_API Stream & operator<< (Stream & s, Box & obj);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_MATH_TYPESERIALIZATION_H
|
@ -4,7 +4,7 @@
|
||||
#define NV_MATH_VECTOR_H
|
||||
|
||||
#include <nvmath/nvmath.h>
|
||||
#include <nvcore/Algorithms.h> // min, max
|
||||
#include <nvcore/Containers.h> // min, max
|
||||
|
||||
namespace nv
|
||||
{
|
||||
@ -71,7 +71,6 @@ public:
|
||||
const Vector2 & xy() const;
|
||||
|
||||
scalar component(uint idx) const;
|
||||
void setComponent(uint idx, scalar f);
|
||||
|
||||
const scalar * ptr() const;
|
||||
|
||||
@ -240,21 +239,13 @@ inline const Vector2 & Vector3::xy() const
|
||||
inline scalar Vector3::component(uint idx) const
|
||||
{
|
||||
nvDebugCheck(idx < 3);
|
||||
if (idx == 0) return m_x;
|
||||
if (idx == 1) return m_y;
|
||||
if (idx == 2) return m_z;
|
||||
if (idx == 0) return x();
|
||||
if (idx == 1) return y();
|
||||
if (idx == 2) return z();
|
||||
nvAssume(false);
|
||||
return 0.0f;
|
||||
}
|
||||
|
||||
inline void Vector3::setComponent(uint idx, float f)
|
||||
{
|
||||
nvDebugCheck(idx < 3);
|
||||
if (idx == 0) m_x = f;
|
||||
else if (idx == 1) m_y = f;
|
||||
else if (idx == 2) m_z = f;
|
||||
}
|
||||
|
||||
inline const scalar * Vector3::ptr() const
|
||||
{
|
||||
return &m_x;
|
||||
@ -486,35 +477,6 @@ inline scalar length(Vector2::Arg v)
|
||||
return sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline scalar inverse_length(Vector2::Arg v)
|
||||
{
|
||||
return 1.0f / sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
|
||||
{
|
||||
return equal(length(v), 1, epsilon);
|
||||
}
|
||||
|
||||
inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
|
||||
{
|
||||
float l = length(v);
|
||||
nvDebugCheck(!isZero(l, epsilon));
|
||||
Vector2 n = scale(v, 1.0f / l);
|
||||
nvDebugCheck(isNormalized(n));
|
||||
return n;
|
||||
}
|
||||
|
||||
inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
|
||||
{
|
||||
float l = length(v);
|
||||
if (isZero(l, epsilon)) {
|
||||
return fallback;
|
||||
}
|
||||
return scale(v, 1.0f / l);
|
||||
}
|
||||
|
||||
|
||||
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
|
||||
{
|
||||
return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
|
||||
@ -633,11 +595,6 @@ inline scalar length(Vector3::Arg v)
|
||||
return sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline scalar inverse_length(Vector3::Arg v)
|
||||
{
|
||||
return 1.0f / sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
|
||||
{
|
||||
return equal(length(v), 1, epsilon);
|
||||
@ -759,11 +716,6 @@ inline scalar length(Vector4::Arg v)
|
||||
return sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline scalar inverse_length(Vector4::Arg v)
|
||||
{
|
||||
return 1.0f / sqrtf(length_squared(v));
|
||||
}
|
||||
|
||||
inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
|
||||
{
|
||||
return equal(length(v), 1, epsilon);
|
||||
|
@ -136,11 +136,6 @@ inline float lerp(float f0, float f1, float t)
|
||||
return f0 * s + f1 * t;
|
||||
}
|
||||
|
||||
inline float square(float f)
|
||||
{
|
||||
return f * f;
|
||||
}
|
||||
|
||||
} // nv
|
||||
|
||||
#endif // NV_MATH_H
|
||||
|
Reference in New Issue
Block a user