concurrency experiments.

2010-11-22 07:34:05 +00:00
parent 0bb3011f7f
commit c51f25f38d
8 changed files with 298 additions and 188 deletions
--- a/src/nvtt/CMakeLists.txt
+++ b/src/nvtt/CMakeLists.txt
@ -1,41 +1,28 @@
 PROJECT(nvtt)

 ADD_SUBDIRECTORY(squish)
-ADD_SUBDIRECTORY(bc6h)
+#ADD_SUBDIRECTORY(bc6h)
 #ADD_SUBDIRECTORY(bc7)

 SET(NVTT_SRCS
-    nvtt.h
-    nvtt.cpp
-    Context.h
-    Context.cpp
-    nvtt_wrapper.h
-    nvtt_wrapper.cpp
+    nvtt.h nvtt.cpp
+    nvtt_wrapper.h nvtt_wrapper.cpp
+    ClusterFit.h ClusterFit.cpp
    Compressor.h
-    CompressorDXT.h
-    CompressorDXT.cpp
-    CompressorDX9.h
-    CompressorDX9.cpp
-    CompressorDX10.h
-    CompressorDX10.cpp
-    CompressorDX11.h
-    CompressorDX11.cpp
-    CompressorRGB.h
-    CompressorRGB.cpp
-    CompressorRGBE.h
-    CompressorRGBE.cpp
-    QuickCompressDXT.h
-    QuickCompressDXT.cpp
-    OptimalCompressDXT.h
-    OptimalCompressDXT.cpp
-    SingleColorLookup.h
-    SingleColorLookup.cpp
-    CompressionOptions.h
-    CompressionOptions.cpp
-    InputOptions.h
-    InputOptions.cpp
-    OutputOptions.h
-    OutputOptions.cpp
+    CompressorDXT.h CompressorDXT.cpp
+    CompressorDX9.h CompressorDX9.cpp
+    CompressorDX10.h CompressorDX10.cpp
+#    CompressorDX11.h CompressorDX11.cpp
+    CompressorRGB.h CompressorRGB.cpp
+    CompressorRGBE.h CompressorRGBE.cpp
+    Context.h Context.cpp
+    QuickCompressDXT.h QuickCompressDXT.cpp
+    OptimalCompressDXT.h OptimalCompressDXT.cpp
+    SingleColorLookup.h SingleColorLookup.cpp
+    CompressionOptions.h CompressionOptions.cpp
+    InputOptions.h InputOptions.cpp
+    OutputOptions.h OutputOptions.cpp
+    TaskDispatcher.h TaskDispatcher.cpp
    TexImage.h TexImage.cpp
    cuda/CudaUtils.h
    cuda/CudaUtils.cpp
@ -62,7 +49,7 @@ ELSE(NVTT_SHARED)
    ADD_LIBRARY(nvtt ${NVTT_SRCS})
 ENDIF(NVTT_SHARED)

-TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish bc6h)
+TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish)

 INSTALL(TARGETS nvtt 
    RUNTIME DESTINATION bin
--- a/src/nvtt/ClusterFit.cpp
+++ b/src/nvtt/ClusterFit.cpp
@ -28,6 +28,8 @@
 #include "nvmath/Fitting.h"
 #include "nvimage/ColorBlock.h"

+#include <float.h> // FLT_MAX
+
 using namespace nv;

 ClusterFit::ClusterFit()
--- a/src/nvtt/CompressorDXT.cpp
+++ b/src/nvtt/CompressorDXT.cpp
@ -1,143 +1,208 @@
 // Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
 // Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include "CompressorDXT.h"
-#include "OutputOptions.h"
-
-#include "nvtt.h"
-
-#include "nvcore/Memory.h"
-
-#include "nvimage/Image.h"
-#include "nvimage/ColorBlock.h"
-#include "nvimage/BlockDXT.h"
-
-#include <new> // placement new
-
-
-// OpenMP
-#if defined(HAVE_OPENMP)
-#include <omp.h>
-#endif
-
-using namespace nv;
-using namespace nvtt;
-
-
-void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-    const uint bs = blockSize();
-    const uint bw = (w + 3) / 4;
-    const uint bh = (h + 3) / 4;
-
-#if defined(HAVE_OPENMP)
-    bool singleThreaded = false;
-#else
-    bool singleThreaded = true;
-#endif
-
-    // Use a single thread to compress small textures.
-    if (bw * bh < 16) singleThreaded = true;
-
-    if (singleThreaded)
-    {
-        nvDebugCheck(bs <= 16);
-        uint8 mem[16]; // @@ Output one row at a time!
-
-        for (int y = 0; y < int(h); y += 4) {
-            for (uint x = 0; x < w; x += 4) {
-
-                ColorBlock rgba;
-                rgba.init(w, h, data, x, y);
-
-                compressBlock(rgba, alphaMode, compressionOptions, mem);
-
-                if (outputOptions.outputHandler != NULL) {
-                    outputOptions.outputHandler->writeData(mem, bs);
-                }
-            }
-        }
-    }
-#if defined(HAVE_OPENMP)
-    else
-    {
-        const uint size = bs * bw * bh;
-        uint8 * mem = new uint8[size];
-
-        #pragma omp parallel
-        {
-            #pragma omp for
-            for (int i = 0; i < int(bw*bh); i++)
-            {
-                const uint x = i % bw;
-                const uint y = i / bw;
-
-		ColorBlock rgba;
-		rgba.init(w, h, data, 4*x, 4*y);
-
-		uint8 * ptr = mem + (y * bw + x) * bs;
-		compressBlock(rgba, alphaMode, compressionOptions, ptr);
-	    } // omp for
-	} // omp parallel
-
-	if (outputOptions.outputHandler != NULL) {
-	    outputOptions.outputHandler->writeData(mem, size);
-	}
-
-        delete [] mem;
-    }
-#endif
-}
-
-
-//#include "bc6h/tile.h"
-
-void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
-{
-    const uint bs = blockSize();
-    const uint bw = (w + 3) / 4;
-    const uint bh = (h + 3) / 4;
-
-    //bool singleThreaded = true;
-    //if (singleThreaded)
-    {
-        uint8 * mem = malloc<uint8>(bs * bw);
-        uint8 * ptr = mem;
-
-        ColorSet set;
-
-        for (uint y = 0; y < h; y += 4) {
-            for (uint x = 0; x < w; x += 4, ptr += bs) {
-                set.setColors(data, w, h, x, y);
-                compressBlock(set, alphaMode, compressionOptions, ptr);
-            }
-
-            if (outputOptions.outputHandler != NULL) {
-                outputOptions.outputHandler->writeData(mem, bs * bw);
-            }
-        }
-
-        free(mem);
-    }
-}
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#include "CompressorDXT.h"
+#include "OutputOptions.h"
+
+#include "nvtt.h"
+#include "TaskDispatcher.h"
+
+#include "nvcore/Memory.h"
+
+#include "nvimage/Image.h"
+#include "nvimage/ColorBlock.h"
+#include "nvimage/BlockDXT.h"
+
+#include <new> // placement new
+
+
+using namespace nv;
+using namespace nvtt;
+
+/*
+// OpenMP
+#if defined(HAVE_OPENMP)
+#include <omp.h>
+#endif
+
+void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+{
+    const uint bs = blockSize();
+    const uint bw = (w + 3) / 4;
+    const uint bh = (h + 3) / 4;
+
+#if defined(HAVE_OPENMP)
+    bool singleThreaded = false;
+#else
+    bool singleThreaded = true;
+#endif
+
+    // Use a single thread to compress small textures.
+    if (bw * bh < 16) singleThreaded = true;
+
+    if (singleThreaded)
+    {
+        nvDebugCheck(bs <= 16);
+        uint8 mem[16]; // @@ Output one row at a time!
+
+        for (int y = 0; y < int(h); y += 4) {
+            for (uint x = 0; x < w; x += 4) {
+
+                ColorBlock rgba;
+                rgba.init(w, h, data, x, y);
+
+                compressBlock(rgba, alphaMode, compressionOptions, mem);
+
+                if (outputOptions.outputHandler != NULL) {
+                    outputOptions.outputHandler->writeData(mem, bs);
+                }
+            }
+        }
+    }
+#if defined(HAVE_OPENMP)
+    else
+    {
+        const uint size = bs * bw * bh;
+        uint8 * mem = new uint8[size];
+
+        #pragma omp parallel
+        {
+            #pragma omp for
+            for (int i = 0; i < int(bw*bh); i++)
+            {
+                const uint x = i % bw;
+                const uint y = i / bw;
+
+		ColorBlock rgba;
+		rgba.init(w, h, data, 4*x, 4*y);
+
+		uint8 * ptr = mem + (y * bw + x) * bs;
+		compressBlock(rgba, alphaMode, compressionOptions, ptr);
+	    } // omp for
+	} // omp parallel
+
+	if (outputOptions.outputHandler != NULL) {
+	    outputOptions.outputHandler->writeData(mem, size);
+	}
+
+        delete [] mem;
+    }
+#endif
+}
+*/
+
+
+struct CompressorContext
+{
+    nvtt::AlphaMode alphaMode;
+    uint w, h;
+    const float * data;
+    const nvtt::CompressionOptions::Private * compressionOptions;
+
+    uint bw, bh, bs;
+    uint8 * mem;
+    FixedBlockCompressor * compressor;
+};
+
+// Each task compresses one row.
+void CompressorTask(void * data, size_t y)
+{
+    CompressorContext * d = (CompressorContext *) data;
+
+    for (uint x = 0; x < d->bw; x++)
+    {
+        ColorBlock rgba;
+        rgba.init(d->w, d->h, d->data, 4*x, 4*y);
+
+        uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
+        d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr);
+    }
+}
+
+void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+{
+    CompressorContext context;
+    context.alphaMode = alphaMode;
+    context.w = w;
+    context.h = h;
+    context.data = data;
+    context.compressionOptions = &compressionOptions;
+
+    context.bs = blockSize();
+    context.bw = (w + 3) / 4;
+    context.bh = (h + 3) / 4;
+
+    context.compressor = this;
+
+    static SequentialTaskDispatcher sequential;
+    static AppleTaskDispatcher concurrent;
+
+    //TaskDispatcher * dispatcher = &sequential;
+    TaskDispatcher * dispatcher = &concurrent;
+
+    // Use a single thread to compress small textures.
+    if (context.bh < 4) dispatcher = &sequential;
+
+    const uint count = context.bw * context.bh;
+    const uint size = context.bs * count;
+    context.mem = new uint8[size];
+
+    dispatcher->dispatch(CompressorTask, &context, context.bh);
+
+    outputOptions.writeData(context.mem, size);
+
+    delete [] context.mem;
+}
+
+
+
+
+void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+{
+    const uint bs = blockSize();
+    const uint bw = (w + 3) / 4;
+    const uint bh = (h + 3) / 4;
+
+    //bool singleThreaded = true;
+    //if (singleThreaded)
+    {
+        uint8 * mem = malloc<uint8>(bs * bw);
+        uint8 * ptr = mem;
+
+        ColorSet set;
+
+        for (uint y = 0; y < h; y += 4) {
+            for (uint x = 0; x < w; x += 4, ptr += bs) {
+                set.setColors(data, w, h, x, y);
+                compressBlock(set, alphaMode, compressionOptions, ptr);
+            }
+
+            if (outputOptions.outputHandler != NULL) {
+                outputOptions.outputHandler->writeData(mem, bs * bw);
+            }
+        }
+
+        free(mem);
+    }
+}
--- a/src/nvtt/Context.cpp
+++ b/src/nvtt/Context.cpp
@ -56,7 +56,7 @@
 using namespace nv;
 using namespace nvtt;

-
+#include <dispatch/dispatch.h>

 Compressor::Compressor() : m(*new Compressor::Private())
 {
@ -71,7 +71,6 @@ Compressor::Compressor() : m(*new Compressor::Private())
 Compressor::~Compressor()
 {
    delete &m;
-    cuda::exit();
 }


@ -84,23 +83,13 @@ void Compressor::enableCudaAcceleration(bool enable)

    if (m.cudaEnabled && m.cuda == NULL)
    {
-        // Select fastest CUDA device. @@ This is done automatically on current CUDA versions.
-        int device = cuda::getFastestDevice();
-        if (!cuda::setDevice(device))
+        m.cuda = new CudaContext();
+
+        if (!m.cuda->isValid())
        {
            m.cudaEnabled = false;
            m.cuda = NULL;
        }
-        else
-        {
-            m.cuda = new CudaContext();
-
-            if (!m.cuda->isValid())
-            {
-                m.cudaEnabled = false;
-                m.cuda = NULL;
-            }
-        }
    }
 }

--- a/src/nvtt/TaskDispatcher.cpp
+++ b/src/nvtt/TaskDispatcher.cpp
--- a/src/nvtt/TaskDispatcher.h
+++ b/src/nvtt/TaskDispatcher.h
@ -0,0 +1,56 @@
+
+#include "nvtt.h"
+
+// OpenMP
+#if defined(HAVE_OPENMP)
+#include <omp.h>
+#endif
+
+#if NV_OS_DARWIN
+//#if defined(HAVE_DISPATCH)
+#include <dispatch/dispatch.h>
+#endif
+
+namespace nvtt {
+
+    struct SequentialTaskDispatcher : public TaskDispatcher
+    {
+        virtual void dispatch(Task * task, void * context, size_t count) {
+            for (size_t i = 0; i < count; i++) {
+                task(context, i);
+            }
+        }
+    };
+
+#if NV_OS_DARWIN
+
+    // Task dispatcher using Apple's Grand Central Dispatch.
+    struct AppleTaskDispatcher : public TaskDispatcher
+    {
+        virtual void dispatch(Task * task, void * context, size_t count) {
+            dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
+            dispatch_apply_f(count, q, context, task);
+        }
+    };
+
+#endif
+
+#if defined(HAVE_OPENMP)
+
+    struct OpenMPTaskDispatcher : public TaskDispatcher
+    {
+        virtual void dispatch(Task * task, void * context, size_t count) {
+            #pragma omp parallel
+            {
+                #pragma omp for
+                for (size_t i = 0; i < count; i++) {
+                    task(context, i);
+                }
+            }
+        }
+    };
+
+#endif
+
+
+} // namespace nvtt
--- a/src/nvtt/nvtt.cpp
+++ b/src/nvtt/nvtt.cpp
@ -56,3 +56,5 @@ unsigned int nvtt::version()
    return NVTT_VERSION;
 }

+
+
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@ -26,6 +26,8 @@
 #ifndef NVTT_H
 #define NVTT_H

+#include <stddef.h> // size_t @@ Use or own define?
+
 // Function linkage
 #if NVTT_SHARED

@ -331,6 +333,12 @@ namespace nvtt
        NVTT_API void setUserVersion(int version);
    };

+    typedef void Task(void * context, size_t id);
+
+    struct TaskDispatcher
+    {
+        virtual void dispatch(Task * task, void * context, size_t count) = 0;
+    };

    /// Context.
    struct Compressor
@ -344,6 +352,7 @@ namespace nvtt
        // Context settings.
        NVTT_API void enableCudaAcceleration(bool enable);
        NVTT_API bool isCudaAccelerationEnabled() const;
+        NVTT_API void setTaskDispatcher(TaskDispatcher * disp);

        // InputOptions API.
        NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;