Merge changes from the witness.
This commit is contained in:
@ -39,7 +39,7 @@ namespace nv
|
||||
// Build a cube centered on center and with edge = 2*dist
|
||||
inline void Box::cube(const Vector3 & center, float dist)
|
||||
{
|
||||
setCenterExtents(center, Vector3(dist, dist, dist));
|
||||
setCenterExtents(center, Vector3(dist));
|
||||
}
|
||||
|
||||
// Build a box, given center and extents.
|
||||
|
@ -89,6 +89,7 @@ namespace nv
|
||||
uint8 b: 8;
|
||||
#endif
|
||||
};
|
||||
uint8 component[4];
|
||||
uint32 u;
|
||||
};
|
||||
};
|
||||
|
@ -6,6 +6,7 @@
|
||||
|
||||
#include "Color.h"
|
||||
#include "Vector.inl"
|
||||
#include "ftoi.h"
|
||||
|
||||
|
||||
namespace nv
|
||||
@ -123,30 +124,30 @@ namespace nv
|
||||
inline Color32 toColor32(const Vector4 & v)
|
||||
{
|
||||
Color32 color;
|
||||
color.r = toU8(nv::iround(saturate(v.x) * 255));
|
||||
color.g = toU8(nv::iround(saturate(v.y) * 255));
|
||||
color.b = toU8(nv::iround(saturate(v.z) * 255));
|
||||
color.a = toU8(nv::iround(saturate(v.w) * 255));
|
||||
color.r = U8(ftoi_round(saturate(v.x) * 255));
|
||||
color.g = U8(ftoi_round(saturate(v.y) * 255));
|
||||
color.b = U8(ftoi_round(saturate(v.z) * 255));
|
||||
color.a = U8(ftoi_round(saturate(v.w) * 255));
|
||||
return color;
|
||||
}
|
||||
|
||||
inline Color32 toColor32_from_bgra(const Vector4 & v)
|
||||
{
|
||||
Color32 color;
|
||||
color.b = toU8(nv::iround(saturate(v.x) * 255));
|
||||
color.g = toU8(nv::iround(saturate(v.y) * 255));
|
||||
color.r = toU8(nv::iround(saturate(v.z) * 255));
|
||||
color.a = toU8(nv::iround(saturate(v.w) * 255));
|
||||
color.b = U8(ftoi_round(saturate(v.x) * 255));
|
||||
color.g = U8(ftoi_round(saturate(v.y) * 255));
|
||||
color.r = U8(ftoi_round(saturate(v.z) * 255));
|
||||
color.a = U8(ftoi_round(saturate(v.w) * 255));
|
||||
return color;
|
||||
}
|
||||
|
||||
inline Color32 toColor32_from_argb(const Vector4 & v)
|
||||
{
|
||||
Color32 color;
|
||||
color.a = toU8(nv::iround(saturate(v.x) * 255));
|
||||
color.r = toU8(nv::iround(saturate(v.y) * 255));
|
||||
color.g = toU8(nv::iround(saturate(v.z) * 255));
|
||||
color.b = toU8(nv::iround(saturate(v.w) * 255));
|
||||
color.a = U8(ftoi_round(saturate(v.x) * 255));
|
||||
color.r = U8(ftoi_round(saturate(v.y) * 255));
|
||||
color.g = U8(ftoi_round(saturate(v.z) * 255));
|
||||
color.b = U8(ftoi_round(saturate(v.w) * 255));
|
||||
return color;
|
||||
}
|
||||
|
||||
|
@ -4,10 +4,11 @@
|
||||
#include "Vector.inl"
|
||||
#include "Plane.inl"
|
||||
|
||||
#include "nvcore/Array.inl"
|
||||
#include "nvcore/Utils.h" // max, swap
|
||||
|
||||
#include <float.h> // FLT_MAX
|
||||
#include <vector>
|
||||
//#include <vector>
|
||||
#include <string.h>
|
||||
|
||||
using namespace nv;
|
||||
@ -329,7 +330,7 @@ void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R);
|
||||
Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
|
||||
{
|
||||
// Store the points in an n x n matrix
|
||||
std::vector<float> Q(n*n, 0.0f);
|
||||
Array<float> Q; Q.resize(n*n, 0.0f);
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
Q[i*n+0] = points[i].x;
|
||||
@ -338,8 +339,8 @@ Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict
|
||||
}
|
||||
|
||||
// Alloc space for the SVD outputs
|
||||
std::vector<float> diag(n, 0.0f);
|
||||
std::vector<float> R(n*n, 0.0f);
|
||||
Array<float> diag; diag.resize(n, 0.0f);
|
||||
Array<float> R; R.resize(n*n, 0.0f);
|
||||
|
||||
ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
|
||||
|
||||
@ -350,7 +351,7 @@ Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict
|
||||
Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points)
|
||||
{
|
||||
// Store the points in an n x n matrix
|
||||
std::vector<float> Q(n*n, 0.0f);
|
||||
Array<float> Q; Q.resize(n*n, 0.0f);
|
||||
for (int i = 0; i < n; ++i)
|
||||
{
|
||||
Q[i*n+0] = points[i].x;
|
||||
@ -360,8 +361,8 @@ Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict
|
||||
}
|
||||
|
||||
// Alloc space for the SVD outputs
|
||||
std::vector<float> diag(n, 0.0f);
|
||||
std::vector<float> R(n*n, 0.0f);
|
||||
Array<float> diag; diag.resize(n, 0.0f);
|
||||
Array<float> R; R.resize(n*n, 0.0f);
|
||||
|
||||
ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
|
||||
|
||||
@ -940,7 +941,7 @@ void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R)
|
||||
float g = 0.0f;
|
||||
float scale = 0.0f;
|
||||
|
||||
std::vector<float> temp(cols, 0.0f);
|
||||
Array<float> temp; temp.resize(cols, 0.0f);
|
||||
|
||||
for( i = 0; i < cols; i++ )
|
||||
{
|
||||
|
@ -580,56 +580,56 @@ namespace nv {
|
||||
void nv::half_init_tables()
|
||||
{
|
||||
// Init mantissa table.
|
||||
mantissa_table[0] = 0;
|
||||
mantissa_table[0] = 0;
|
||||
|
||||
// denormals
|
||||
for (int i = 1; i < 1024; i++) {
|
||||
uint m = i << 13;
|
||||
uint e = 0;
|
||||
for (int i = 1; i < 1024; i++) {
|
||||
uint m = i << 13;
|
||||
uint e = 0;
|
||||
|
||||
while ((m & 0x00800000) == 0) {
|
||||
e -= 0x00800000;
|
||||
m <<= 1;
|
||||
}
|
||||
m &= ~0x00800000;
|
||||
e += 0x38800000;
|
||||
mantissa_table[i] = m | e;
|
||||
}
|
||||
while ((m & 0x00800000) == 0) {
|
||||
e -= 0x00800000;
|
||||
m <<= 1;
|
||||
}
|
||||
m &= ~0x00800000;
|
||||
e += 0x38800000;
|
||||
mantissa_table[i] = m | e;
|
||||
}
|
||||
|
||||
// normals
|
||||
for (int i = 1024; i < 2048; i++) {
|
||||
mantissa_table[i] = (i - 1024) << 13;
|
||||
mantissa_table[i] = (i - 1024) << 13;
|
||||
}
|
||||
|
||||
|
||||
// Init exponent table.
|
||||
exponent_table[0] = 0;
|
||||
exponent_table[0] = 0;
|
||||
|
||||
for (int i = 1; i < 31; i++) {
|
||||
exponent_table[i] = 0x38000000 + (i << 23);
|
||||
exponent_table[i] = 0x38000000 + (i << 23);
|
||||
}
|
||||
|
||||
exponent_table[31] = 0x7f800000;
|
||||
exponent_table[32] = 0x80000000;
|
||||
exponent_table[31] = 0x7f800000;
|
||||
exponent_table[32] = 0x80000000;
|
||||
|
||||
for (int i = 33; i < 63; i++) {
|
||||
exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
|
||||
exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
|
||||
}
|
||||
|
||||
exponent_table[63] = 0xff800000;
|
||||
exponent_table[63] = 0xff800000;
|
||||
|
||||
|
||||
// Init offset table.
|
||||
offset_table[0] = 0;
|
||||
offset_table[0] = 0;
|
||||
|
||||
for (int i = 1; i < 32; i++) {
|
||||
offset_table[i] = 1024;
|
||||
offset_table[i] = 1024;
|
||||
}
|
||||
|
||||
offset_table[32] = 0;
|
||||
offset_table[32] = 0;
|
||||
|
||||
for (int i = 33; i < 64; i++) {
|
||||
offset_table[i] = 1024;
|
||||
offset_table[i] = 1024;
|
||||
}
|
||||
}
|
||||
|
||||
@ -660,27 +660,27 @@ uint32 nv::fast_half_to_float(uint16 v)
|
||||
// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though.
|
||||
|
||||
|
||||
static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
|
||||
static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
|
||||
static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
|
||||
static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
|
||||
static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
|
||||
static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
|
||||
static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000};
|
||||
|
||||
__asm
|
||||
{
|
||||
movaps xmm1, xmm0 // Input in xmm0
|
||||
movaps xmm2, xmm0
|
||||
movaps xmm1, xmm0 // Input in xmm0
|
||||
movaps xmm2, xmm0
|
||||
|
||||
andps xmm0, half_sign
|
||||
andps xmm1, half_exponent
|
||||
andps xmm2, half_mantissa
|
||||
paddd xmm1, half_bias_offset
|
||||
andps xmm0, half_sign
|
||||
andps xmm1, half_exponent
|
||||
andps xmm2, half_mantissa
|
||||
paddd xmm1, half_bias_offset
|
||||
|
||||
pslld xmm0, 16
|
||||
pslld xmm1, 13
|
||||
pslld xmm2, 13
|
||||
pslld xmm0, 16
|
||||
pslld xmm1, 13
|
||||
pslld xmm2, 13
|
||||
|
||||
orps xmm1, xmm2
|
||||
orps xmm0, xmm1 // Result in xmm0
|
||||
orps xmm1, xmm2
|
||||
orps xmm0, xmm1 // Result in xmm0
|
||||
}
|
||||
|
||||
|
||||
|
@ -7,6 +7,10 @@
|
||||
|
||||
#include <float.h>
|
||||
|
||||
#if !NV_CC_MSVC && !NV_OS_ORBIS
|
||||
#include <alloca.h>
|
||||
#endif
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
@ -20,8 +24,7 @@ static bool ludcmp(float **a, int n, int *indx, float *d)
|
||||
{
|
||||
const float TINY = 1.0e-20f;
|
||||
|
||||
Array<float> vv; // vv stores the implicit scaling of each row.
|
||||
vv.resize(n);
|
||||
float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row.
|
||||
|
||||
*d = 1.0; // No row interchanges yet.
|
||||
for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information.
|
||||
@ -149,6 +152,21 @@ bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x)
|
||||
return true;
|
||||
}
|
||||
|
||||
// @@ Not tested.
|
||||
Matrix nv::inverseLU(const Matrix & A)
|
||||
{
|
||||
Vector4 Ai[4];
|
||||
|
||||
solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]);
|
||||
solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]);
|
||||
solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]);
|
||||
solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]);
|
||||
|
||||
return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]);
|
||||
}
|
||||
|
||||
|
||||
|
||||
bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
|
||||
{
|
||||
nvDebugCheck(x != NULL);
|
||||
@ -184,7 +202,7 @@ bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
|
||||
{
|
||||
nvDebugCheck(x != NULL);
|
||||
|
||||
*x = transform(inverse(A), b);
|
||||
*x = transform(inverseCramer(A), b);
|
||||
|
||||
return true; // @@ Return false if determinant(A) == 0 !
|
||||
}
|
||||
@ -198,7 +216,7 @@ bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
|
||||
return false;
|
||||
}
|
||||
|
||||
Matrix3 Ai = inverse(A);
|
||||
Matrix3 Ai = inverseCramer(A);
|
||||
|
||||
*x = transform(Ai, b);
|
||||
|
||||
@ -207,6 +225,119 @@ bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
|
||||
|
||||
|
||||
|
||||
// Inverse using gaussian elimination. From Jon's code.
|
||||
Matrix nv::inverse(const Matrix & m) {
|
||||
|
||||
Matrix A = m;
|
||||
Matrix B(identity);
|
||||
|
||||
int i, j, k;
|
||||
float max, t, det, pivot;
|
||||
|
||||
det = 1.0;
|
||||
for (i=0; i<4; i++) { /* eliminate in column i, below diag */
|
||||
max = -1.;
|
||||
for (k=i; k<4; k++) /* find pivot for column i */
|
||||
if (fabs(A(k, i)) > max) {
|
||||
max = fabs(A(k, i));
|
||||
j = k;
|
||||
}
|
||||
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
|
||||
if (j!=i) { /* swap rows i and j */
|
||||
for (k=i; k<4; k++)
|
||||
swap(A(i, k), A(j, k));
|
||||
for (k=0; k<4; k++)
|
||||
swap(B(i, k), B(j, k));
|
||||
det = -det;
|
||||
}
|
||||
pivot = A(i, i);
|
||||
det *= pivot;
|
||||
for (k=i+1; k<4; k++) /* only do elems to right of pivot */
|
||||
A(i, k) /= pivot;
|
||||
for (k=0; k<4; k++)
|
||||
B(i, k) /= pivot;
|
||||
/* we know that A(i, i) will be set to 1, so don't bother to do it */
|
||||
|
||||
for (j=i+1; j<4; j++) { /* eliminate in rows below i */
|
||||
t = A(j, i); /* we're gonna zero this guy */
|
||||
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */
|
||||
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
|
||||
for (k=0; k<4; k++)
|
||||
B(j, k) -= B(i, k)*t;
|
||||
}
|
||||
}
|
||||
|
||||
/*---------- backward elimination ----------*/
|
||||
|
||||
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
|
||||
for (j=0; j<i; j++) { /* eliminate in rows above i */
|
||||
t = A(j, i); /* we're gonna zero this guy */
|
||||
for (k=0; k<4; k++) /* subtract scaled row i from row j */
|
||||
B(j, k) -= B(i, k)*t;
|
||||
}
|
||||
}
|
||||
|
||||
return B;
|
||||
}
|
||||
|
||||
|
||||
Matrix3 nv::inverse(const Matrix3 & m) {
|
||||
|
||||
Matrix3 A = m;
|
||||
Matrix3 B(identity);
|
||||
|
||||
int i, j, k;
|
||||
float max, t, det, pivot;
|
||||
|
||||
det = 1.0;
|
||||
for (i=0; i<3; i++) { /* eliminate in column i, below diag */
|
||||
max = -1.;
|
||||
for (k=i; k<3; k++) /* find pivot for column i */
|
||||
if (fabs(A(k, i)) > max) {
|
||||
max = fabs(A(k, i));
|
||||
j = k;
|
||||
}
|
||||
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
|
||||
if (j!=i) { /* swap rows i and j */
|
||||
for (k=i; k<3; k++)
|
||||
swap(A(i, k), A(j, k));
|
||||
for (k=0; k<3; k++)
|
||||
swap(B(i, k), B(j, k));
|
||||
det = -det;
|
||||
}
|
||||
pivot = A(i, i);
|
||||
det *= pivot;
|
||||
for (k=i+1; k<3; k++) /* only do elems to right of pivot */
|
||||
A(i, k) /= pivot;
|
||||
for (k=0; k<3; k++)
|
||||
B(i, k) /= pivot;
|
||||
/* we know that A(i, i) will be set to 1, so don't bother to do it */
|
||||
|
||||
for (j=i+1; j<3; j++) { /* eliminate in rows below i */
|
||||
t = A(j, i); /* we're gonna zero this guy */
|
||||
for (k=i+1; k<3; k++) /* subtract scaled row i from row j */
|
||||
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
|
||||
for (k=0; k<3; k++)
|
||||
B(j, k) -= B(i, k)*t;
|
||||
}
|
||||
}
|
||||
|
||||
/*---------- backward elimination ----------*/
|
||||
|
||||
for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */
|
||||
for (j=0; j<i; j++) { /* eliminate in rows above i */
|
||||
t = A(j, i); /* we're gonna zero this guy */
|
||||
for (k=0; k<3; k++) /* subtract scaled row i from row j */
|
||||
B(j, k) -= B(i, k)*t;
|
||||
}
|
||||
}
|
||||
|
||||
return B;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
|
@ -83,6 +83,9 @@ namespace nv
|
||||
void rotate(float theta, float v0, float v1, float v2);
|
||||
float determinant() const;
|
||||
|
||||
void operator+=(const Matrix & m);
|
||||
void operator-=(const Matrix & m);
|
||||
|
||||
void apply(Matrix::Arg m);
|
||||
|
||||
private:
|
||||
@ -90,11 +93,18 @@ namespace nv
|
||||
};
|
||||
|
||||
// Solve equation system using LU decomposition and back-substitution.
|
||||
extern bool solveLU(const Matrix & m, const Vector4 & b, Vector4 * x);
|
||||
extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x);
|
||||
|
||||
// Solve equation system using Cramer's inverse.
|
||||
extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x);
|
||||
|
||||
// Compute inverse using LU decomposition.
|
||||
extern Matrix inverseLU(const Matrix & m);
|
||||
|
||||
// Compute inverse using Gaussian elimination and partial pivoting.
|
||||
extern Matrix inverse(const Matrix & m);
|
||||
extern Matrix3 inverse(const Matrix3 & m);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_MATH_MATRIX_H
|
||||
|
@ -195,7 +195,7 @@ namespace nv
|
||||
}
|
||||
|
||||
// Inverse using Cramer's rule.
|
||||
inline Matrix3 inverse(const Matrix3 & m)
|
||||
inline Matrix3 inverseCramer(const Matrix3 & m)
|
||||
{
|
||||
const float det = m.determinant();
|
||||
if (equal(det, 0.0f, 0.0f)) {
|
||||
@ -477,6 +477,25 @@ namespace nv
|
||||
return m;
|
||||
}
|
||||
|
||||
// Get inverse frustum matrix.
|
||||
inline Matrix frustumInverse(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
|
||||
{
|
||||
Matrix m(0.0f);
|
||||
|
||||
float one_doubleznear = 1.0f / (2.0f * zNear);
|
||||
float one_doubleznearzfar = 1.0f / (2.0f * zNear * zFar);
|
||||
|
||||
m(0,0) = (xmax - xmin) * one_doubleznear;
|
||||
m(0,3) = (xmax + xmin) * one_doubleznear;
|
||||
m(1,1) = (ymax - ymin) * one_doubleznear;
|
||||
m(1,3) = (ymax + ymin) * one_doubleznear;
|
||||
m(2,3) = -1;
|
||||
m(3,2) = -(zFar - zNear) * one_doubleznearzfar;
|
||||
m(3,3) = (zFar + zNear) * one_doubleznearzfar;
|
||||
|
||||
return m;
|
||||
}
|
||||
|
||||
// Get infinite frustum matrix.
|
||||
inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear)
|
||||
{
|
||||
@ -510,6 +529,18 @@ namespace nv
|
||||
return frustum(xmin, xmax, ymin, ymax, zNear, zFar);
|
||||
}
|
||||
|
||||
// Get inverse perspective matrix.
|
||||
inline Matrix perspectiveInverse(float fovy, float aspect, float zNear, float zFar)
|
||||
{
|
||||
float xmax = zNear * tan(fovy / 2);
|
||||
float xmin = -xmax;
|
||||
|
||||
float ymax = xmax / aspect;
|
||||
float ymin = -ymax;
|
||||
|
||||
return frustumInverse(xmin, xmax, ymin, ymax, zNear, zFar);
|
||||
}
|
||||
|
||||
// Get infinite perspective matrix.
|
||||
inline Matrix perspective(float fovy, float aspect, float zNear)
|
||||
{
|
||||
@ -544,7 +575,7 @@ namespace nv
|
||||
}
|
||||
|
||||
// Inverse using Cramer's rule.
|
||||
inline Matrix inverse(Matrix::Arg m)
|
||||
inline Matrix inverseCramer(Matrix::Arg m)
|
||||
{
|
||||
Matrix r;
|
||||
r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15);
|
||||
@ -622,6 +653,35 @@ namespace nv
|
||||
return m;
|
||||
}
|
||||
|
||||
inline void Matrix::operator+=(const Matrix & m)
|
||||
{
|
||||
for(int i = 0; i < 16; i++) {
|
||||
m_data[i] += m.m_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline void Matrix::operator-=(const Matrix & m)
|
||||
{
|
||||
for(int i = 0; i < 16; i++) {
|
||||
m_data[i] -= m.m_data[i];
|
||||
}
|
||||
}
|
||||
|
||||
inline Matrix operator+(const Matrix & a, const Matrix & b)
|
||||
{
|
||||
Matrix m = a;
|
||||
m += b;
|
||||
return m;
|
||||
}
|
||||
|
||||
inline Matrix operator-(const Matrix & a, const Matrix & b)
|
||||
{
|
||||
Matrix m = a;
|
||||
m -= b;
|
||||
return m;
|
||||
}
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
|
@ -12,13 +12,13 @@
|
||||
# if NV_CPU_X86 || NV_CPU_X86_64
|
||||
# define NV_USE_SSE 2
|
||||
# endif
|
||||
//# if defined(__SSE2__)
|
||||
//# define NV_USE_SSE 2
|
||||
//# elif defined(__SSE__)
|
||||
//# define NV_USE_SSE 1
|
||||
//# else
|
||||
//# define NV_USE_SSE 0
|
||||
//# endif
|
||||
# if defined(__SSE2__)
|
||||
# define NV_USE_SSE 2
|
||||
# elif defined(__SSE__)
|
||||
# define NV_USE_SSE 1
|
||||
# else
|
||||
# define NV_USE_SSE 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
// Internally set NV_USE_SIMD when either altivec or sse is available.
|
||||
|
@ -144,6 +144,6 @@ namespace nv
|
||||
// Instead we simply have explicit casts:
|
||||
template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); }
|
||||
template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); }
|
||||
template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.z); }
|
||||
template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.w); }
|
||||
|
||||
#endif // NV_MATH_VECTOR_H
|
||||
|
@ -440,14 +440,17 @@ namespace nv
|
||||
}
|
||||
|
||||
// Note, this is the area scaled by 2!
|
||||
inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1)
|
||||
{
|
||||
return (v0.x * v1.y - v0.y * v1.x); // * 0.5f;
|
||||
}
|
||||
inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
|
||||
{
|
||||
Vector2 v0 = a - c;
|
||||
Vector2 v1 = b - c;
|
||||
|
||||
return (v0.x * v1.y - v0.y * v1.x);
|
||||
return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f;
|
||||
//return triangleArea(a-c, b-c);
|
||||
}
|
||||
|
||||
|
||||
template <>
|
||||
inline uint hash(const Vector2 & v, uint h)
|
||||
{
|
||||
|
256
src/nvmath/ftoi.h
Executable file
256
src/nvmath/ftoi.h
Executable file
@ -0,0 +1,256 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#pragma once
|
||||
#ifndef NV_MATH_FTOI_H
|
||||
#define NV_MATH_FTOI_H
|
||||
|
||||
#include "nvmath/nvmath.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
// Optimized float to int conversions. See:
|
||||
// http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
|
||||
// http://www.stereopsis.com/sree/fpu2006.html
|
||||
// http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
|
||||
// http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
|
||||
|
||||
|
||||
union DoubleAnd64 {
|
||||
uint64 i;
|
||||
double d;
|
||||
};
|
||||
|
||||
static const double floatutil_xs_doublemagic = (6755399441055744.0); // 2^52 * 1.5
|
||||
static const double floatutil_xs_doublemagicdelta = (1.5e-8); // almost .5f = .5f + 1e^(number of exp bit)
|
||||
static const double floatutil_xs_doublemagicroundeps = (0.5f - floatutil_xs_doublemagicdelta); // almost .5f = .5f - 1e^(number of exp bit)
|
||||
|
||||
NV_FORCEINLINE int ftoi_round_xs(double val, double magic) {
|
||||
#if 1
|
||||
DoubleAnd64 dunion;
|
||||
dunion.d = val + magic;
|
||||
return (int32) dunion.i; // just cast to grab the bottom bits
|
||||
#else
|
||||
val += magic;
|
||||
return ((int*)&val)[0]; // @@ Assumes little endian.
|
||||
#endif
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_round_xs(float val) {
|
||||
return ftoi_round_xs(val, floatutil_xs_doublemagic);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_floor_xs(float val) {
|
||||
return ftoi_round_xs(val - floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_ceil_xs(float val) {
|
||||
return ftoi_round_xs(val + floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_trunc_xs(float val) {
|
||||
return (val<0) ? ftoi_ceil_xs(val) : ftoi_floor_xs(val);
|
||||
}
|
||||
|
||||
#if NV_CPU_X86 || NV_CPU_X86_64
|
||||
|
||||
NV_FORCEINLINE int ftoi_round_sse(float f) {
|
||||
return _mm_cvt_ss2si(_mm_set_ss(f));
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_trunc_sse(float f) {
|
||||
return _mm_cvtt_ss2si(_mm_set_ss(f));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if NV_USE_SSE
|
||||
|
||||
NV_FORCEINLINE int ftoi_round(float val) {
|
||||
return ftoi_round_sse(val);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_trunc(float f) {
|
||||
return ftoi_trunc_sse(f);
|
||||
}
|
||||
|
||||
// We can probably do better than this. See for example:
|
||||
// http://dss.stephanierct.com/DevBlog/?p=8
|
||||
NV_FORCEINLINE int ftoi_floor(float val) {
|
||||
return ftoi_round(floorf(val));
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_ceil(float val) {
|
||||
return ftoi_round(ceilf(val));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// In theory this should work with any double floating point math implementation, but it appears that MSVC produces incorrect code
|
||||
// when SSE2 is targeted and fast math is enabled (/arch:SSE2 & /fp:fast). These problems go away with /fp:precise, which is the default mode.
|
||||
|
||||
NV_FORCEINLINE int ftoi_round(float val) {
|
||||
return ftoi_round_xs(val);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_floor(float val) {
|
||||
return ftoi_floor_xs(val);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_ceil(float val) {
|
||||
return ftoi_ceil_xs(val);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_trunc(float f) {
|
||||
return ftoi_trunc_xs(f);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
inline void test_ftoi() {
|
||||
|
||||
// Round to nearest integer.
|
||||
nvCheck(ftoi_round(0.1f) == 0);
|
||||
nvCheck(ftoi_round(0.6f) == 1);
|
||||
nvCheck(ftoi_round(-0.2f) == 0);
|
||||
nvCheck(ftoi_round(-0.7f) == -1);
|
||||
nvCheck(ftoi_round(10.1f) == 10);
|
||||
nvCheck(ftoi_round(10.6f) == 11);
|
||||
nvCheck(ftoi_round(-90.1f) == -90);
|
||||
nvCheck(ftoi_round(-90.6f) == -91);
|
||||
|
||||
nvCheck(ftoi_round(0) == 0);
|
||||
nvCheck(ftoi_round(1) == 1);
|
||||
nvCheck(ftoi_round(-1) == -1);
|
||||
|
||||
nvCheck(ftoi_round(0.5f) == 0); // How are midpoints rounded? Bankers rounding.
|
||||
nvCheck(ftoi_round(1.5f) == 2);
|
||||
nvCheck(ftoi_round(2.5f) == 2);
|
||||
nvCheck(ftoi_round(3.5f) == 4);
|
||||
nvCheck(ftoi_round(4.5f) == 4);
|
||||
nvCheck(ftoi_round(-0.5f) == 0);
|
||||
nvCheck(ftoi_round(-1.5f) == -2);
|
||||
|
||||
|
||||
// Truncation (round down if > 0, round up if < 0).
|
||||
nvCheck(ftoi_trunc(0.1f) == 0);
|
||||
nvCheck(ftoi_trunc(0.6f) == 0);
|
||||
nvCheck(ftoi_trunc(-0.2f) == 0);
|
||||
nvCheck(ftoi_trunc(-0.7f) == 0); // @@ When using /arch:SSE2 in Win32, msvc produce wrong code for this one. It is skipping the addition.
|
||||
nvCheck(ftoi_trunc(1.99f) == 1);
|
||||
nvCheck(ftoi_trunc(-1.2f) == -1);
|
||||
|
||||
// Floor (round down).
|
||||
nvCheck(ftoi_floor(0.1f) == 0);
|
||||
nvCheck(ftoi_floor(0.6f) == 0);
|
||||
nvCheck(ftoi_floor(-0.2f) == -1);
|
||||
nvCheck(ftoi_floor(-0.7f) == -1);
|
||||
nvCheck(ftoi_floor(1.99f) == 1);
|
||||
nvCheck(ftoi_floor(-1.2f) == -2);
|
||||
|
||||
nvCheck(ftoi_floor(0) == 0);
|
||||
nvCheck(ftoi_floor(1) == 1);
|
||||
nvCheck(ftoi_floor(-1) == -1);
|
||||
nvCheck(ftoi_floor(2) == 2);
|
||||
nvCheck(ftoi_floor(-2) == -2);
|
||||
|
||||
// Ceil (round up).
|
||||
nvCheck(ftoi_ceil(0.1f) == 1);
|
||||
nvCheck(ftoi_ceil(0.6f) == 1);
|
||||
nvCheck(ftoi_ceil(-0.2f) == 0);
|
||||
nvCheck(ftoi_ceil(-0.7f) == 0);
|
||||
nvCheck(ftoi_ceil(1.99f) == 2);
|
||||
nvCheck(ftoi_ceil(-1.2f) == -1);
|
||||
|
||||
nvCheck(ftoi_ceil(0) == 0);
|
||||
nvCheck(ftoi_ceil(1) == 1);
|
||||
nvCheck(ftoi_ceil(-1) == -1);
|
||||
nvCheck(ftoi_ceil(2) == 2);
|
||||
nvCheck(ftoi_ceil(-2) == -2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Safe versions using standard casts.
|
||||
|
||||
inline int iround(float f)
|
||||
{
|
||||
return int(floorf(f + 0.5f));
|
||||
}
|
||||
|
||||
inline int iround(double f)
|
||||
{
|
||||
return int(::floor(f + 0.5));
|
||||
}
|
||||
|
||||
inline int ifloor(float f)
|
||||
{
|
||||
return int(floorf(f));
|
||||
}
|
||||
|
||||
inline int iceil(float f)
|
||||
{
|
||||
return int(ceilf(f));
|
||||
}
|
||||
|
||||
|
||||
|
||||
// I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
|
||||
// Some notes from cbloom: http://cbloomrants.blogspot.com/2011/07/07-26-11-pixel-int-to-float-options.html
|
||||
|
||||
// Quantize a float in the [0,1] range, using exact end points or uniform bins.
|
||||
inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
|
||||
nvDebugCheck(bits <= 16);
|
||||
|
||||
float range = float(1 << bits);
|
||||
if (exactEndPoints) {
|
||||
return floorf(x * (range-1) + 0.5f) / (range-1);
|
||||
}
|
||||
else {
|
||||
return (floorf(x * range) + 0.5f) / range;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This is the most common rounding mode:
|
||||
//
|
||||
// 0 1 2 3
|
||||
// |___|_______|_______|___|
|
||||
// 0 1
|
||||
//
|
||||
// You get that if you take the unit floating point number multiply by 'N-1' and round to nearest. That is, `i = round(f * (N-1))`.
|
||||
// You reconstruct the original float dividing by 'N-1': `f = i / (N-1)`
|
||||
|
||||
|
||||
// 0 1 2 3
|
||||
// |_____|_____|_____|_____|
|
||||
// 0 1
|
||||
|
||||
/*enum BinningMode {
|
||||
RoundMode_ExactEndPoints,
|
||||
RoundMode_UniformBins,
|
||||
};*/
|
||||
|
||||
template <int N>
|
||||
inline uint unitFloatToFixed(float f) {
|
||||
return ftoi_round(f * ((1<<N)-1));
|
||||
}
|
||||
|
||||
inline uint8 unitFloatToFixed8(float f) {
|
||||
return (uint8)unitFloatToFixed<8>(f);
|
||||
}
|
||||
|
||||
inline uint16 unitFloatToFixed16(float f) {
|
||||
return (uint16)unitFloatToFixed<16>(f);
|
||||
}
|
||||
|
||||
|
||||
} // nv
|
||||
|
||||
#endif // NV_MATH_FTOI_H
|
@ -14,6 +14,13 @@
|
||||
#include <float.h> // finite, isnan
|
||||
#endif
|
||||
|
||||
#if NV_CPU_X86 || NV_CPU_X86_64
|
||||
//#include <intrin.h>
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
// Function linkage
|
||||
#if NVMATH_SHARED
|
||||
#ifdef NVMATH_EXPORTS
|
||||
@ -28,6 +35,37 @@
|
||||
#define NVMATH_CLASS
|
||||
#endif // NVMATH_SHARED
|
||||
|
||||
// Set some reasonable defaults.
|
||||
#ifndef NV_USE_ALTIVEC
|
||||
# define NV_USE_ALTIVEC NV_CPU_PPC
|
||||
//# define NV_USE_ALTIVEC defined(__VEC__)
|
||||
#endif
|
||||
|
||||
#ifndef NV_USE_SSE
|
||||
# if NV_CPU_X86_64
|
||||
// x64 always supports at least SSE2
|
||||
# define NV_USE_SSE 2
|
||||
# elif NV_CC_MSVC && defined(_M_IX86_FP)
|
||||
// Also on x86 with the /arch:SSE flag in MSVC.
|
||||
# define NV_USE_SSE _M_IX86_FP // 1=SSE, 2=SS2
|
||||
# elif defined(__SSE__)
|
||||
# define NV_USE_SSE 1
|
||||
# elif defined(__SSE2__)
|
||||
# define NV_USE_SSE 2
|
||||
# else
|
||||
// Otherwise we assume no SSE.
|
||||
# define NV_USE_SSE 0
|
||||
# endif
|
||||
#endif
|
||||
|
||||
|
||||
// Internally set NV_USE_SIMD when either altivec or sse is available.
|
||||
#if NV_USE_ALTIVEC && NV_USE_SSE
|
||||
# error "Cannot enable both altivec and sse!"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#ifndef PI
|
||||
#define PI float(3.1415926535897932384626433833)
|
||||
#endif
|
||||
@ -179,26 +217,6 @@ namespace nv
|
||||
inline float cube(float f) { return f * f * f; }
|
||||
inline int cube(int i) { return i * i * i; }
|
||||
|
||||
// @@ Float to int conversions to be optimized at some point. See:
|
||||
// http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
|
||||
// http://www.stereopsis.com/sree/fpu2006.html
|
||||
// http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
|
||||
// http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
|
||||
inline int iround(float f)
|
||||
{
|
||||
return int(floorf(f + 0.5f));
|
||||
}
|
||||
|
||||
inline int ifloor(float f)
|
||||
{
|
||||
return int(floorf(f));
|
||||
}
|
||||
|
||||
inline int iceil(float f)
|
||||
{
|
||||
return int(ceilf(f));
|
||||
}
|
||||
|
||||
inline float frac(float f)
|
||||
{
|
||||
return f - floor(f);
|
||||
@ -242,21 +260,6 @@ namespace nv
|
||||
return 0;
|
||||
}
|
||||
|
||||
// I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
|
||||
|
||||
// Quantize a float in the [0,1] range, using exact end points or uniform bins.
|
||||
inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
|
||||
nvDebugCheck(bits <= 16);
|
||||
|
||||
float range = float(1 << bits);
|
||||
if (exactEndPoints) {
|
||||
return floorf(x * (range-1) + 0.5f) / (range-1);
|
||||
}
|
||||
else {
|
||||
return (floorf(x * range) + 0.5f) / range;
|
||||
}
|
||||
}
|
||||
|
||||
union Float754 {
|
||||
unsigned int raw;
|
||||
float value;
|
||||
|
Reference in New Issue
Block a user