Merge changes from the Witness.

2013-06-07 17:53:55 +00:00
parent 634229a842
commit 94d0635285
49 changed files with 1974 additions and 625 deletions
--- a/src/nvmath/Color.h
+++ b/src/nvmath/Color.h
@ -118,6 +118,32 @@ namespace nv
        };
    };

+    /// 16 bit 4444 BGRA color.
+    class NVMATH_CLASS Color16_4444
+    {
+    public:
+        Color16_4444() { }
+        Color16_4444(const Color16_4444 & c) : u(c.u) { }
+        explicit Color16_4444(uint16 U) : u(U) { }
+
+        union {
+            struct {
+#if NV_LITTLE_ENDIAN
+                uint16 b : 4;
+                uint16 g : 4;
+                uint16 r : 4;
+                uint16 a : 4;
+#else
+                uint16 a : 4;
+                uint16 r : 4;
+                uint16 g : 4;
+                uint16 b : 4;
+#endif
+            };
+            uint16 u;
+        };
+    };
+
 } // nv namespace

 #endif // NV_MATH_COLOR_H
--- a/src/nvmath/Color.inl
+++ b/src/nvmath/Color.inl
@ -10,6 +10,12 @@

 namespace nv
 {
+    // for Color16 & Color16_4444 bitfields
+    NV_FORCEINLINE uint32 U32round(float f) { return uint32(floorf(f + 0.5f)); }
+    NV_FORCEINLINE uint16 U16round(float f) { return uint16(floorf(f + 0.5f)); }
+    NV_FORCEINLINE uint16 toU4_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 15u); return (uint16)x; }
+    NV_FORCEINLINE uint16 toU5_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 31u); return (uint16)x; }
+    NV_FORCEINLINE uint16 toU6_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 63u); return (uint16)x; }

    // Clamp color components.
    inline Vector3 colorClamp(Vector3::Arg c)
@ -27,6 +33,16 @@ namespace nv
        return c / scale;
    }

+    // Convert Color16 from float components
+    inline Color16 toColor16(float r, float g, float b)
+    {
+        Color16 color; // 5,6,5
+        color.r = toU5_in_U16(nv::U16round(saturate(r) * 31u));
+        color.g = toU6_in_U16(nv::U16round(saturate(g) * 63u));
+        color.b = toU5_in_U16(nv::U16round(saturate(b) * 31u));
+        return color;
+    }
+
    // Convert Color32 to Color16.
    inline Color16 toColor16(Color32 c)
    {
@ -43,6 +59,49 @@ namespace nv
        return color; 
    }

+    // Convert Color32 to Color16_4444.
+    inline Color16_4444 toColor16_4444(Color32 c)
+    {
+        Color16_4444 color;
+        color.a = c.a >> 4;
+        color.r = c.r >> 4;
+        color.g = c.g >> 4;
+        color.b = c.b >> 4;
+        return color; 
+    }
+
+    // Convert float[4] to Color16_4444.
+    inline Color16_4444 toColor16_4444(float r, float g, float b, float a)
+    {
+        Color16_4444 color;
+        color.a = toU4_in_U16(nv::U16round(saturate(a) * 15u));
+        color.r = toU4_in_U16(nv::U16round(saturate(r) * 15u));
+        color.g = toU4_in_U16(nv::U16round(saturate(g) * 15u));
+        color.b = toU4_in_U16(nv::U16round(saturate(b) * 15u));
+        return color;
+    }
+
+    // Convert float[4] to Color16_4444.
+    inline Color16_4444 toColor16_4444_from_argb(float * fc)
+    {
+        Color16_4444 color;
+        color.a = toU4_in_U16(nv::U16round(saturate(fc[0]) * 15u));
+        color.r = toU4_in_U16(nv::U16round(saturate(fc[1]) * 15u));
+        color.g = toU4_in_U16(nv::U16round(saturate(fc[2]) * 15u));
+        color.b = toU4_in_U16(nv::U16round(saturate(fc[3]) * 15u));
+        return color;
+    }
+
+    // Convert float[4] to Color16_4444.
+    inline Color16_4444 toColor16_4444_from_bgra(float * fc)
+    {
+        Color16_4444 color;
+        color.b = toU4_in_U16(nv::U16round(saturate(fc[0]) * 15u));
+        color.g = toU4_in_U16(nv::U16round(saturate(fc[1]) * 15u));
+        color.r = toU4_in_U16(nv::U16round(saturate(fc[2]) * 15u));
+        color.a = toU4_in_U16(nv::U16round(saturate(fc[3]) * 15u));
+        return color;
+    }

    // Promote 16 bit color to 32 bit using regular bit expansion.
    inline Color32 toColor32(Color16 c)
@ -60,13 +119,34 @@ namespace nv
        return color;
    }

-    inline Color32 toColor32(Vector4::Arg v)
+    // @@ Quantize with exact endpoints or with uniform bins?
+    inline Color32 toColor32(const Vector4 & v)
    {
        Color32 color;
-        color.r = uint8(saturate(v.x) * 255);
-        color.g = uint8(saturate(v.y) * 255);
-        color.b = uint8(saturate(v.z) * 255);
-        color.a = uint8(saturate(v.w) * 255);
+        color.r = toU8(nv::iround(saturate(v.x) * 255));
+        color.g = toU8(nv::iround(saturate(v.y) * 255));
+        color.b = toU8(nv::iround(saturate(v.z) * 255));
+        color.a = toU8(nv::iround(saturate(v.w) * 255));
+        return color;
+    }
+
+    inline Color32 toColor32_from_bgra(const Vector4 & v)
+    {
+        Color32 color;
+        color.b = toU8(nv::iround(saturate(v.x) * 255));
+        color.g = toU8(nv::iround(saturate(v.y) * 255));
+        color.r = toU8(nv::iround(saturate(v.z) * 255));
+        color.a = toU8(nv::iround(saturate(v.w) * 255));
+        return color;
+    }
+
+    inline Color32 toColor32_from_argb(const Vector4 & v)
+    {
+        Color32 color;
+        color.a = toU8(nv::iround(saturate(v.x) * 255));
+        color.r = toU8(nv::iround(saturate(v.y) * 255));
+        color.g = toU8(nv::iround(saturate(v.z) * 255));
+        color.b = toU8(nv::iround(saturate(v.w) * 255));
        return color;
    }

@ -92,6 +172,30 @@ namespace nv
        return h;
    }

+    inline float toSrgb(float f) {
+        if (nv::isNan(f))           f = 0.0f;
+        else if (f <= 0.0f)         f = 0.0f;
+        else if (f <= 0.0031308f)   f = 12.92f * f;
+        else if (f <= 1.0f)         f = (powf(f, 0.41666f) * 1.055f) - 0.055f;
+        else                        f = 1.0f;
+        return f;
+    }
+
+    inline float fromSrgb(float f) {
+        if (f < 0.0f)           f = 0.0f;
+        else if (f < 0.04045f)  f = f / 12.92f;
+        else if (f <= 1.0f)     f = powf((f + 0.055f) / 1.055f, 2.4f);
+        else                    f = 1.0f;
+        return f;
+    }
+
+    inline Vector3 toSrgb(const Vector3 & v) {
+        return Vector3(toSrgb(v.x), toSrgb(v.y), toSrgb(v.z));
+    }
+
+    inline Vector3 fromSrgb(const Vector3 & v) {
+        return Vector3(fromSrgb(v.x), fromSrgb(v.y), fromSrgb(v.z));
+    }

 } // nv namespace

--- a/src/nvmath/Fitting.cpp
+++ b/src/nvmath/Fitting.cpp
@ -179,7 +179,7 @@ bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON
 {
    // compute the centroid and covariance
    float matrix[6];
-    Vector3 centroid = computeCovariance(n, points, matrix);
+    computeCovariance(n, points, matrix);

    float eigenValues[3];
    Vector3 eigenVectors[3];
--- a/src/nvmath/Fitting.h
+++ b/src/nvmath/Fitting.h
@ -9,9 +9,6 @@

 namespace nv
 {
-    class Vector3;
-    class Plane;
-
    namespace Fit
    {
        Vector3 computeCentroid(int n, const Vector3 * points);
--- a/src/nvmath/Half.cpp
+++ b/src/nvmath/Half.cpp
@ -74,14 +74,8 @@
 //

 #include "Half.h"
-
-#include "nvcore/Memory.h"
-
 #include <stdio.h>

-#if NV_CC_GNUC
-#include <xmmintrin.h>
-#endif

 // Load immediate
 static inline uint32 _uint32_li( uint32 a )
@ -495,9 +489,20 @@ nv::half_to_float( uint16 h )
 }


+#if !NV_OS_IOS //ACStodoIOS some better define to choose this?
+
+#if NV_CC_GNUC
+#if defined(__i386__) || defined(__x86_64__)
+#include <xmmintrin.h>
+#endif
+#endif
+
+#include "nvcore/Memory.h" // NV_ALIGN_16
+
 static __m128 half_to_float4_SSE2(__m128i h)
 {
 #define SSE_CONST4(name, val) static const NV_ALIGN_16 uint name[4] = { (val), (val), (val), (val) }
+    
 #define CONST(name) *(const __m128i *)&name

    SSE_CONST4(mask_nosign,         0x7fff);
@ -541,7 +546,7 @@ static __m128 half_to_float4_SSE2(__m128i h)
 }


-void nv::half_to_float_array(const uint16 * vin, float * vout, int count) {
+void nv::half_to_float_array_SSE2(const uint16 * vin, float * vout, int count) {
    nvDebugCheck((intptr_t(vin) & 15) == 0);
    nvDebugCheck((intptr_t(vout) & 15) == 0);
    nvDebugCheck((count & 7) == 0);
@ -562,7 +567,7 @@ void nv::half_to_float_array(const uint16 * vin, float * vout, int count) {
    }
 }

-
+#endif 


 // @@ These tables could be smaller.
@ -769,4 +774,4 @@ static inline uint16_t float_to_half_nobranch(uint32_t x)
    bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff];
    return bits;
 }
-#endif
+#endif
--- a/src/nvmath/Half.h
+++ b/src/nvmath/Half.h
@ -10,7 +10,8 @@ namespace nv {
    uint16 half_from_float( uint32 f );

    // vin,vout must be 16 byte aligned. count must be a multiple of 8.
-    void half_to_float_array(const uint16 * vin, float * vout, int count);
+    // implement a non-SSE version if we need it. For now, this naming makes it clear this is only available when SSE2 is
+    void half_to_float_array_SSE2(const uint16 * vin, float * vout, int count);

    void half_init_tables();

@ -40,6 +41,51 @@ namespace nv {
        return f.f;
    }

+
+    union Half {
+        uint16 raw;
+        struct {
+        #if NV_BIG_ENDIAN
+            uint negative:1;
+            uint biasedexponent:5;
+            uint mantissa:10;
+        #else
+            uint mantissa:10;
+            uint biasedexponent:5;
+            uint negative:1;
+        #endif
+        } field;
+    };
+
+
+    inline float TestHalfPrecisionAwayFromZero(float input)
+    {
+        Half h;
+        h.raw = to_half(input);
+        h.raw += 1;
+
+        float f = to_float(h.raw);
+        
+        // Subtract the initial value to find our precision
+        float delta = f - input;
+
+        return delta;
+    }
+     
+    inline float TestHalfPrecisionTowardsZero(float input)
+    {
+        Half h;
+        h.raw = to_half(input);
+        h.raw -= 1;
+
+        float f = to_float(h.raw);
+
+        // Subtract the initial value to find our precision
+        float delta = f - input;
+
+        return -delta;
+    }
+
 } // nv namespace

 #endif // NV_MATH_HALF_H
--- a/src/nvmath/Vector.h
+++ b/src/nvmath/Vector.h
@ -18,7 +18,8 @@ namespace nv
        Vector2(float x, float y);
        Vector2(Vector2::Arg v);

-        template <typename T> operator T() const { return T(x, y); }
+        //template <typename T> explicit Vector2(const T & v) : x(v.x), y(v.y) {}
+        //template <typename T> operator T() const { return T(x, y); }

        const Vector2 & operator=(Vector2::Arg v);

@ -50,11 +51,13 @@ namespace nv

        Vector3();
        explicit Vector3(float x);
+        //explicit Vector3(int x) : x(float(x)), y(float(x)), z(float(x)) {}
        Vector3(float x, float y, float z);
        Vector3(Vector2::Arg v, float z);
        Vector3(Vector3::Arg v);

-        template <typename T> operator T() const { return T(x, y, z); }
+        //template <typename T> explicit Vector3(const T & v) : x(v.x), y(v.y), z(v.z) {}
+        //template <typename T> operator T() const { return T(x, y, z); }

        const Vector3 & operator=(Vector3::Arg v);

@ -96,7 +99,8 @@ namespace nv
        Vector4(Vector4::Arg v);
        //	Vector4(const Quaternion & v);

-        template <typename T> operator T() const { return T(x, y, z, w); }
+        //template <typename T> explicit Vector4(const T & v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
+        //template <typename T> operator T() const { return T(x, y, z, w); }

        const Vector4 & operator=(Vector4::Arg v);

@ -127,4 +131,16 @@ namespace nv

 } // nv namespace

+// If we had these functions, they would be ambiguous, the compiler would not know which one to pick:
+//template <typename T> Vector2 to(const T & v) { return Vector2(v.x, v.y); }
+//template <typename T> Vector3 to(const T & v) { return Vector3(v.x, v.y, v.z); }
+//template <typename T> Vector4 to(const T & v) { return Vector4(v.x, v.y, v.z, v.z); }
+
+// We could use a cast operator so that we could infer the expected type, but that doesn't work the same way in all compilers and produces horrible error messages.
+
+// Instead we simply have explicit casts:
+template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); }
+template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); }
+template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.z); }
+
 #endif // NV_MATH_VECTOR_H
--- a/src/nvmath/Vector.inl
+++ b/src/nvmath/Vector.inl
@ -336,6 +336,11 @@ namespace nv
        return sqrtf(lengthSquared(v));
    }

+    inline float distance(Vector2::Arg a, Vector2::Arg b)
+    {
+        return length(a - b);
+    }
+
    inline float inverseLength(Vector2::Arg v)
    {
        return 1.0f / sqrtf(lengthSquared(v));
@ -784,6 +789,90 @@ namespace nv
        return sdbmFloatHash(v.component, 4, h);
    }

+
+#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float
+
+    //int:
+
+    inline Vector2 scale(Vector2::Arg v, int s)
+    {
+        return Vector2(v.x * s, v.y * s);
+    }
+
+    inline Vector2 operator*(Vector2::Arg v, int s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector2 operator*(int s, Vector2::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector2 operator/(Vector2::Arg v, int s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    inline Vector3 scale(Vector3::Arg v, int s)
+    {
+        return Vector3(v.x * s, v.y * s, v.z * s);
+    }
+
+    inline Vector3 operator*(Vector3::Arg v, int s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector3 operator*(int s, Vector3::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector3 operator/(Vector3::Arg v, int s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    inline Vector4 scale(Vector4::Arg v, int s)
+    {
+        return Vector4(v.x * s, v.y * s, v.z * s, v.w * s);
+    }
+
+    inline Vector4 operator*(Vector4::Arg v, int s)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector4 operator*(int s, Vector4::Arg v)
+    {
+        return scale(v, s);
+    }
+
+    inline Vector4 operator/(Vector4::Arg v, int s)
+    {
+        return scale(v, 1.0f/s);
+    }
+
+    //double:
+
+    inline Vector3 operator*(Vector3::Arg v, double s)
+    {
+        return scale(v, (float)s);
+    }
+
+    inline Vector3 operator*(double s, Vector3::Arg v)
+    {
+        return scale(v, (float)s);
+    }
+
+    inline Vector3 operator/(Vector3::Arg v, double s)
+    {
+        return scale(v, 1.f/((float)s));
+    }    
+        
+#endif //NV_OS_IOS
+
 } // nv namespace

 #endif // NV_MATH_VECTOR_INL
--- a/src/nvmath/nvmath.h
+++ b/src/nvmath/nvmath.h
@ -132,7 +132,7 @@ namespace nv
    {
 #if NV_OS_WIN32 || NV_OS_XBOX
        return _finite(f) != 0;
-#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
+#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS
        return isfinite(f);
 #elif NV_OS_LINUX
        return finitef(f);
@ -147,7 +147,7 @@ namespace nv
    {
 #if NV_OS_WIN32 || NV_OS_XBOX
        return _isnan(f) != 0;
-#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD
+#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS
        return isnan(f);
 #elif NV_OS_LINUX
        return isnanf(f);
@ -242,21 +242,18 @@ namespace nv

    // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.

-    // Quantize a [0, 1] full precision float, using exact endpoints.
-    inline float quantizeFloat(float f, uint bits) {
+    // Quantize a float in the [0,1] range, using exact end points or uniform bins.
+    inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
        nvDebugCheck(bits <= 16);
-        float scale = float((1 << bits) - 1);
-        float offset = 0.0f;
-        return floor(saturate(f) * scale + offset) / scale;
-    }

-    // Quantize a [0, 1] full precision float, using uniform bins.
-    /*inline float quantizeFloat(float f, uint bits) {
-        nvDebugCheck(bits <= 16);
-        float scale = float(1 << bits);
-        float offset = 0.5f;
-        return floor(saturate(f) * scale + offset) / scale;
-    }*/
+        float range = float(1 << bits);
+        if (exactEndPoints) {
+            return floorf(x * (range-1) + 0.5f) / (range-1);
+        }
+        else {
+            return (floorf(x * range) + 0.5f) / range;
+        }
+    }

    union Float754 {
        unsigned int raw;