Merge changes from The Witness.

2011-04-06 02:41:15 +00:00 · 2011-04-06 02:41:15 +00:00 · 8a837981b6
commit 8a837981b6
parent 9ebcff93de
19 changed files with 212 additions and 45 deletions
--- a/project/vc9/nvconfig.h
+++ b/project/vc9/nvconfig.h
@ -7,15 +7,15 @@
 //#cmakedefine HAVE_EXECINFO_H
 #define HAVE_MALLOC_H

-#if !defined(_DEBUG)
+#if defined(_OPENMP)
 #define HAVE_OPENMP
 #endif

-#if !defined(_M_X64)
+/*#if !defined(_M_X64)
 //#define HAVE_FREEIMAGE
 #define HAVE_PNG
 #define HAVE_JPEG
 #define HAVE_TIFF
-#endif
+#endif*/

 #endif // NV_CONFIG
--- a/project/vc9/nvcore/nvcore.vcproj
+++ b/project/vc9/nvcore/nvcore.vcproj
@ -312,10 +312,6 @@
 			RelativePath="..\..\..\src\nvcore\FileSystem.h"
 			>
 		</File>
-		<File
-			RelativePath="..\..\..\src\nvcore\HashMap.h"
-			>
-		</File>
 		<File
 			RelativePath="..\..\..\src\nvcore\Library.cpp"
 			>
--- a/project/vc9/nvtt/nvtt.vcproj
+++ b/project/vc9/nvtt/nvtt.vcproj
@ -217,7 +217,7 @@
 				StringPooling="true"
 				RuntimeLibrary="2"
 				EnableEnhancedInstructionSet="2"
-				OpenMP="true"
+				OpenMP="false"
 				UsePrecompiledHeader="0"
 				WarningLevel="3"
 				DebugInformationFormat="3"
@ -304,7 +304,7 @@
 				PreprocessorDefinitions="WIN32;NDEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;HAVE_CUDA;__SSE2__;__SSE__;__MMX__"
 				StringPooling="true"
 				RuntimeLibrary="2"
-				OpenMP="true"
+				OpenMP="false"
 				UsePrecompiledHeader="0"
 				WarningLevel="3"
 				DebugInformationFormat="3"
--- a/src/nvcore/Array.h
+++ b/src/nvcore/Array.h
@ -46,7 +46,7 @@ namespace nv
    }

    template <typename T>
-    void construct(T * restrict ptr, uint new_size, uint old_size, const T & value) {
+    void construct(T * restrict ptr, uint new_size, uint old_size, const T & elem) {
        for (uint i = old_size; i < new_size; i++) {
            new(ptr+i) T(elem);	// placement new
        }
@ -498,7 +498,7 @@ namespace nv
            }

            for (uint i = 0; i < p.m_size; i++) {
-                s << buffer()[i];
+                s << p.buffer()[i];
            }

            return s;
--- a/src/nvcore/HashMap.h
+++ b/src/nvcore/HashMap.h
@ -416,6 +416,9 @@ namespace nv

            if (s.isLoading()) {
                map.clear();
+                if(entry_count == 0) {
+                    return s;
+                }
                map.entry_count = entry_count;
                map.size_mask = nextPowerOfTwo(entry_count) - 1;
                map.table = malloc<Entry>(map.size_mask + 1);
@ -468,6 +471,13 @@ namespace nv
            return s;
        }

+        /// Swap the members of this vector and the given vector.
+        friend void swap(HashMap<T, U, H, E> & a, HashMap<T, U, H, E> & b)
+        {
+            swap(a.entry_count, b.entry_count);
+            swap(a.size_mask, b.size_mask);
+            swap(a.table, b.table);
+        }

    private:
        static const uint TOMBSTONE_HASH = (uint) -1;
--- a/src/nvcore/StdStream.h
+++ b/src/nvcore/StdStream.h
@ -299,6 +299,8 @@ namespace nv
        }
        //@}

+        const uint8 * ptr() const { return m_ptr; }
+

    private:

--- a/src/nvcore/Utils.h
+++ b/src/nvcore/Utils.h
@ -105,6 +105,19 @@ namespace nv
        return h;
    }

+    // Note that this hash does not handle NaN properly.
+    inline uint sdbmFloatHash(const float * f, uint count, uint h = 5381)
+    {
+        for (uint i = 0; i < count; i++) {
+            //nvDebugCheck(nv::isFinite(*f));
+            union { float f; uint32 i; } x = { *f };
+            if (x.i == 0x80000000) x.i = 0;
+            h = sdbmHash(&x, 4, h);
+        }
+        return h;
+    }
+
+
    // Some hash functors:
    template <typename Key> struct Hash 
    {
@ -120,6 +133,12 @@ namespace nv
    {
        uint operator()(uint x) const { return x; }
    };
+    template <> struct Hash<float>
+    {
+        uint operator()(float f) const {
+            return sdbmFloatHash(&f, 1);
+        }
+    };

    template <typename Key> struct Equal
    {
--- a/src/nvimage/DirectDrawSurface.cpp
+++ b/src/nvimage/DirectDrawSurface.cpp
@ -54,6 +54,16 @@ const uint nv::FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B');
 const uint nv::FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1');
 const uint nv::FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2');

+const uint nv::D3DFMT_A16B16G16R16 = 36;
+
+// Floating point formats
+const uint nv::D3DFMT_R16F = 111;
+const uint nv::D3DFMT_G16R16F = 112;
+const uint nv::D3DFMT_A16B16G16R16F = 113;
+const uint nv::D3DFMT_R32F = 114;
+const uint nv::D3DFMT_G32R32F = 115;
+const uint nv::D3DFMT_A32B32G32R32F = 116;
+

 namespace
 {
@ -82,8 +92,6 @@ namespace
    static const uint D3DFMT_G16R16 = 34;
    static const uint D3DFMT_A2R10G10B10 = 35;

-    static const uint D3DFMT_A16B16G16R16 = 36;
-
    // Palette formats.
    static const uint D3DFMT_A8P8 = 40;
    static const uint D3DFMT_P8 = 41;
@ -94,13 +102,6 @@ namespace
    static const uint D3DFMT_A4L4 = 52;
    static const uint D3DFMT_L16 = 81;

-    // Floating point formats
-    static const uint D3DFMT_R16F = 111;
-    static const uint D3DFMT_G16R16F = 112;
-    static const uint D3DFMT_A16B16G16R16F = 113;
-    static const uint D3DFMT_R32F = 114;
-    static const uint D3DFMT_G32R32F = 115;
-    static const uint D3DFMT_A32B32G32R32F = 116;

    static const uint DDSD_CAPS = 0x00000001U;
    static const uint DDSD_PIXELFORMAT = 0x00001000U;
@ -501,7 +502,7 @@ namespace
        uint amask;
    };

-    static const FormatDescriptor s_d3dFormats[] =
+    static const FormatDescriptor s_d3d9Formats[] =
    {
        { D3DFMT_R8G8B8,		24, 0xFF0000,   0xFF00,	    0xFF,       0 },
        { D3DFMT_A8R8G8B8,		32, 0xFF0000,   0xFF00,     0xFF,       0xFF000000 },  // DXGI_FORMAT_B8G8R8A8_UNORM
@ -525,21 +526,21 @@ namespace
        { D3DFMT_L16,			16, 16,         0,          0,          0 },           // DXGI_FORMAT_R16_UNORM
    };

-    static const uint s_d3dFormatCount = sizeof(s_d3dFormats) / sizeof(s_d3dFormats[0]);
+    static const uint s_d3d9FormatCount = NV_ARRAY_SIZE(s_d3d9Formats);

 } // namespace

 uint nv::findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask)
 {
-    for (int i = 0; i < s_d3dFormatCount; i++)
+    for (int i = 0; i < s_d3d9FormatCount; i++)
    {
-        if (s_d3dFormats[i].bitcount == bitcount &&
-            s_d3dFormats[i].rmask == rmask &&
-            s_d3dFormats[i].gmask == gmask &&
-            s_d3dFormats[i].bmask == bmask &&
-            s_d3dFormats[i].amask == amask)
+        if (s_d3d9Formats[i].bitcount == bitcount &&
+            s_d3d9Formats[i].rmask == rmask &&
+            s_d3d9Formats[i].gmask == gmask &&
+            s_d3d9Formats[i].bmask == bmask &&
+            s_d3d9Formats[i].amask == amask)
        {
-            return s_d3dFormats[i].format;
+            return s_d3d9Formats[i].format;
        }
    }

@ -547,7 +548,6 @@ uint nv::findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint
 }


-
 DDSHeader::DDSHeader()
 {
    this->fourcc = FOURCC_DDS;
--- a/src/nvimage/DirectDrawSurface.h
+++ b/src/nvimage/DirectDrawSurface.h
@ -44,6 +44,15 @@ namespace nv
    extern const uint FOURCC_ATI1;
    extern const uint FOURCC_ATI2;

+    extern const uint D3DFMT_G16R16;
+    extern const uint D3DFMT_A16B16G16R16;
+    extern const uint D3DFMT_R16F;
+    extern const uint D3DFMT_R32F;
+    extern const uint D3DFMT_G16R16F;
+    extern const uint D3DFMT_G32R32F;
+    extern const uint D3DFMT_A16B16G16R16F;
+    extern const uint D3DFMT_A32B32G32R32F;
+
    extern uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask);

    struct NVIMAGE_CLASS DDSPixelFormat
--- a/src/nvimage/ErrorMetric.cpp
+++ b/src/nvimage/ErrorMetric.cpp
@ -395,7 +395,7 @@ float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1)
        n0 = normalizeSafe(n0, Vector3(0), 0.0f);
        n1 = normalizeSafe(n1, Vector3(0), 0.0f);

-        float angle = acos(clamp(dot(n0, n1), -1.0f, 1.0f));
+        float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f));
        error += angle * angle;
    }

--- a/src/nvmath/Color.h
+++ b/src/nvmath/Color.h
@ -169,6 +169,17 @@ namespace nv
        return color;
    }

+    inline Color32 toColor32(Vector4::Arg v)
+    {
+        Color32 color;
+        color.r = uint8(clamp(v.x, 0.0f, 1.0f) * 255);
+        color.g = uint8(clamp(v.y, 0.0f, 1.0f) * 255);
+        color.b = uint8(clamp(v.z, 0.0f, 1.0f) * 255);
+        color.a = uint8(clamp(v.w, 0.0f, 1.0f) * 255);
+
+        return color;
+    }
+
    inline Vector4 toVector4(Color32 c)
    {
        const float scale = 1.0f / 255.0f;
--- a/src/nvmath/Half.h
+++ b/src/nvmath/Half.h
@ -12,6 +12,18 @@ namespace nv {
    // Does not handle NaN or infinity.
    uint32 fast_half_to_float( uint16 h );

+    inline uint16 to_half(float c) {
+        union { float f; uint32 u; } f;
+        f.f = c;
+        return nv::half_from_float( f.u );
+    }
+
+    inline float to_float(uint16 c) {
+        union { float f; uint32 u; } f;
+        f.u = nv::fast_half_to_float( c );
+        return f.f;
+    }
+
 } // nv namespace

 #endif // NV_MATH_HALF_H
--- a/src/nvmath/Vector.h
+++ b/src/nvmath/Vector.h
@ -46,6 +46,9 @@ namespace nv
        };
    };

+    // Helpers to convert vector types. Assume T has x,y members and 2 argument constructor.
+    template <typename T> T to(Vector2::Arg v) { return T(v.x, v.y); }
+

    class NVMATH_CLASS Vector3
    {
@ -85,7 +88,6 @@ namespace nv
    };

    // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
-    template <typename T> Vector3 from(const T & v) { return Vector3(v.x, v.y, v.z); }
    template <typename T> T to(Vector3::Arg v) { return T(v.x, v.y, v.z); }


@ -128,6 +130,10 @@ namespace nv
        };
    };

+    // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor.
+    template <typename T> T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); }
+
+

    // Vector2

@ -484,6 +490,14 @@ namespace nv
        return isFinite(v.x) && isFinite(v.y);
    }

+    inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f))
+    {
+        if (!isValid(v)) return fallback;
+        Vector2 vf = v;
+        nv::floatCleanup(vf.component, 2);
+        return vf;
+    }
+

    // Vector3

@ -630,11 +644,6 @@ namespace nv
        return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max));
    }

-    inline bool isValid(Vector3::Arg v)
-    {
-        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z);
-    }
-
    inline Vector3 floor(Vector3::Arg v)
    {
        return Vector3(floorf(v.x), floorf(v.y), floorf(v.z));
@ -645,6 +654,21 @@ namespace nv
        return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z));
    }

+    inline bool isValid(Vector3::Arg v)
+    {
+        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z);
+    }
+
+    inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f))
+    {
+        if (!isValid(v)) return fallback;
+        Vector3 vf = v;
+        nv::floatCleanup(vf.component, 3);
+        return vf;
+    }
+
+
+
    // Vector4

    inline Vector4 add(Vector4::Arg a, Vector4::Arg b)
@ -758,6 +782,14 @@ namespace nv
        return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w);
    }

+    inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f))
+    {
+        if (!isValid(v)) return fallback;
+        Vector4 vf = v;
+        nv::floatCleanup(vf.component, 4);
+        return vf;
+    }
+
 } // nv namespace

 #endif // NV_MATH_VECTOR_H
--- a/src/nvmath/nvmath.h
+++ b/src/nvmath/nvmath.h
@ -176,7 +176,7 @@ namespace nv
    // http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
    inline int iround(float f)
    {
-        return int(f);
+        return int(floorf(f + 0.5f));
    }

    inline int ifloor(float f)
@ -200,6 +200,16 @@ namespace nv
        return float(iround(f));
    }

+    // Eliminates negative zeros from a float array.
+    inline void floatCleanup(float * fp, int n)
+    {
+        nvDebugCheck(isFinite(*fp));
+        for (int i = 0; i < n; i++) {
+            union { float f; uint32 i; } x = { fp[i] };
+            if (x.i == 0x80000000) fp[i] = 0.0f;
+        }
+    }
+
 } // nv

 #endif // NV_MATH_H
--- a/src/nvtt/CompressionOptions.cpp
+++ b/src/nvtt/CompressionOptions.cpp
@ -218,6 +218,18 @@ unsigned int CompressionOptions::d3d9Format() const
            if (bitcount <= 32) {
                return nv::findD3D9Format(bitcount, rmask, gmask, bmask, amask);
            }
+            else {
+                //if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return D3DFMT_G16R16;
+                if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return D3DFMT_A16B16G16R16;
+            }
+        }
+        else if (m.pixelType == PixelType_Float) {
+            if (m.rsize == 16 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return D3DFMT_R16F;
+            if (m.rsize == 32 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return D3DFMT_R32F;
+            if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return D3DFMT_G16R16F;
+            if (m.rsize == 32 && m.gsize == 32 && m.bsize == 0 && m.asize == 0) return D3DFMT_G32R32F;
+            if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return D3DFMT_A16B16G16R16F;
+            if (m.rsize == 32 && m.gsize == 32 && m.bsize == 32 && m.asize == 32) return D3DFMT_A32B32G32R32F;
        }

        return 0;
--- a/src/nvtt/Context.cpp
+++ b/src/nvtt/Context.cpp
@ -149,6 +149,11 @@ int Compressor::estimateSize(const TexImage & tex, int mipmapCount, const Compre


 // Raw API.
+bool Compressor::outputHeader(TextureType type, int w, int h, int d, int mipmapCount, bool isNormalMap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
+{
+    return m.outputHeader(type, w, h, d, mipmapCount, isNormalMap, compressionOptions.m, outputOptions.m);
+}
+
 bool Compressor::compress(int w, int h, int d, int face, int mipmap, const float * rgba, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
 {
    return m.compress(AlphaMode_None, w, h, d, face, mipmap, rgba, compressionOptions.m, outputOptions.m);
--- a/src/nvtt/TexImage.cpp
+++ b/src/nvtt/TexImage.cpp
@ -285,19 +285,40 @@ float TexImage::alphaTestCoverage(float alphaRef/*= 0.5*/) const
    return m->image->alphaTestCoverage(alphaRef, 3);
 }

-float TexImage::average(int channel) const
+float TexImage::average(int channel, int alpha_channel/*= -1*/, float gamma /*= 2.2f*/) const
 {
    if (m->image == NULL) return 0.0f;

+    const uint count = m->image->width() * m->image->height();
+
    float sum = 0.0f;
    const float * c = m->image->channel(channel);

-    const uint count = m->image->width() * m->image->height();
-    for (uint i = 0; i < count; i++) {
-        sum += c[i];
+    float denom;
+
+    if (alpha_channel == -1) {
+        for (uint i = 0; i < count; i++) {
+            sum += powf(c[i], gamma);
+        }
+
+        denom = float(count);
+    }
+    else {
+        float alpha_sum = 0.0f;
+        const float * a = m->image->channel(alpha_channel);
+        
+        for (uint i = 0; i < count; i++) {
+            sum += powf(c[i], gamma) * a[i];
+            alpha_sum += a[i];
+        }
+
+        denom = alpha_sum;
    }

-    return sum / count;
+    // Avoid division by zero.
+    if (denom == 0.0f) return 0.0f;
+
+    return sum / denom;
 }

 const float * TexImage::data() const
@ -1047,6 +1068,33 @@ void TexImage::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)
    m->image->scaleAlphaToCoverage(coverage, alphaRef, 3);
 }

+/*bool TexImage::normalizeRange(float * rangeMin, float * rangeMax)
+{
+    if (m->image == NULL) return false;
+
+    range(0, rangeMin, rangeMax);
+
+    if (*rangeMin == *rangeMax) {
+        // Single color image.
+        return false;
+    }
+
+    const float scale = 1.0f / (*rangeMax - *rangeMin);
+    const float bias = *rangeMin * scale;
+
+    if (range.x == 0.0f && range.y == 1.0f) {
+        // Already normalized.
+        return true;
+    }
+
+    detach();
+
+    // Scale to range.
+    img->scaleBias(0, 4, scale, bias);
+    //img->clamp(0, 4, 0.0f, 1.0f);
+
+    return true;
+}*/

 // Ideally you should compress/quantize the RGB and M portions independently.
 // Once you have M quantized, you would compute the corresponding RGB and quantize that.
--- a/src/nvtt/TexImage.h
+++ b/src/nvtt/TexImage.h
@ -27,7 +27,6 @@

 #include "nvtt.h"

-#include "nvcore/Array.h"
 #include "nvcore/RefCounted.h"
 #include "nvcore/Ptr.h"

--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@ -380,6 +380,7 @@ namespace nvtt
        NVTT_API int estimateSize(const TexImage & tex, int mipmapCount, const CompressionOptions & compressionOptions) const;

        // Raw API.
+        NVTT_API bool outputHeader(TextureType type, int w, int h, int d, int mipmapCount, bool isNormalMap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
        NVTT_API bool compress(int w, int h, int d, int face, int mipmap, const float * rgba, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
        NVTT_API int estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions & compressionOptions) const;
    };
@ -419,7 +420,7 @@ namespace nvtt
        NVTT_API bool isNormalMap() const;
        NVTT_API int countMipmaps() const;
        NVTT_API float alphaTestCoverage(float alphaRef = 0.5) const;
-        NVTT_API float average(int channel) const;
+        NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const;
        NVTT_API const float * data() const;
        NVTT_API void histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const;
        NVTT_API void range(int channel, float * rangeMin, float * rangeMax);
@ -454,6 +455,7 @@ namespace nvtt
        NVTT_API void setBorder(float r, float g, float b, float a);
        NVTT_API void fill(float r, float g, float b, float a);
        NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f);
+        //NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax);
        NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f);
        NVTT_API void fromRGBM(float range = 1.0f);
        NVTT_API void toYCoCg();