concurrency experiments.

2010-11-22 07:34:05 +00:00
parent 0bb3011f7f
commit c51f25f38d
8 changed files with 298 additions and 188 deletions
--- a/src/nvtt/CMakeLists.txt
+++ b/src/nvtt/CMakeLists.txt
@ -1,41 +1,28 @@
 PROJECT(nvtt)
 ADD_SUBDIRECTORY(squish)
-ADD_SUBDIRECTORY(bc6h)
+#ADD_SUBDIRECTORY(bc6h)
 #ADD_SUBDIRECTORY(bc7)
 SET(NVTT_SRCS
-    nvtt.h
+    nvtt.h nvtt.cpp
-    nvtt.cpp
+    nvtt_wrapper.h nvtt_wrapper.cpp
-    Context.h
+    ClusterFit.h ClusterFit.cpp
    Context.cpp
    nvtt_wrapper.h
    nvtt_wrapper.cpp
    Compressor.h
-    CompressorDXT.h
+    CompressorDXT.h CompressorDXT.cpp
-    CompressorDXT.cpp
+    CompressorDX9.h CompressorDX9.cpp
-    CompressorDX9.h
+    CompressorDX10.h CompressorDX10.cpp
-    CompressorDX9.cpp
+#    CompressorDX11.h CompressorDX11.cpp
-    CompressorDX10.h
+    CompressorRGB.h CompressorRGB.cpp
-    CompressorDX10.cpp
+    CompressorRGBE.h CompressorRGBE.cpp
-    CompressorDX11.h
+    Context.h Context.cpp
-    CompressorDX11.cpp
+    QuickCompressDXT.h QuickCompressDXT.cpp
-    CompressorRGB.h
+    OptimalCompressDXT.h OptimalCompressDXT.cpp
-    CompressorRGB.cpp
+    SingleColorLookup.h SingleColorLookup.cpp
-    CompressorRGBE.h
+    CompressionOptions.h CompressionOptions.cpp
-    CompressorRGBE.cpp
+    InputOptions.h InputOptions.cpp
-    QuickCompressDXT.h
+    OutputOptions.h OutputOptions.cpp
-    QuickCompressDXT.cpp
+    TaskDispatcher.h TaskDispatcher.cpp
    OptimalCompressDXT.h
    OptimalCompressDXT.cpp
    SingleColorLookup.h
    SingleColorLookup.cpp
    CompressionOptions.h
    CompressionOptions.cpp
    InputOptions.h
    InputOptions.cpp
    OutputOptions.h
    OutputOptions.cpp
    TexImage.h TexImage.cpp
    cuda/CudaUtils.h
    cuda/CudaUtils.cpp
@ -62,7 +49,7 @@ ELSE(NVTT_SHARED)
    ADD_LIBRARY(nvtt ${NVTT_SRCS})
 ENDIF(NVTT_SHARED)
-TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish bc6h)
+TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish)
 INSTALL(TARGETS nvtt 
    RUNTIME DESTINATION bin
--- a/src/nvtt/ClusterFit.cpp
+++ b/src/nvtt/ClusterFit.cpp
@ -28,6 +28,8 @@
 #include "nvmath/Fitting.h"
 #include "nvimage/ColorBlock.h"
 #include <float.h> // FLT_MAX
 using namespace nv;
 ClusterFit::ClusterFit()
--- a/src/nvtt/CompressorDXT.cpp
+++ b/src/nvtt/CompressorDXT.cpp
@ -1,143 +1,208 @@
 // Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
 // Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
-// 
+// 
-// Permission is hereby granted, free of charge, to any person
+// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
+// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
+// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
+// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
+// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
+// Software is furnished to do so, subject to the following
-// conditions:
+// conditions:
-// 
+// 
-// The above copyright notice and this permission notice shall be
+// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
+// included in all copies or substantial portions of the Software.
-// 
+// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
+// OTHER DEALINGS IN THE SOFTWARE.
-
+
-#include "CompressorDXT.h"
+#include "CompressorDXT.h"
-#include "OutputOptions.h"
+#include "OutputOptions.h"
-
+
-#include "nvtt.h"
+#include "nvtt.h"
-
+#include "TaskDispatcher.h"
-#include "nvcore/Memory.h"
+
-
+#include "nvcore/Memory.h"
-#include "nvimage/Image.h"
+
-#include "nvimage/ColorBlock.h"
+#include "nvimage/Image.h"
-#include "nvimage/BlockDXT.h"
+#include "nvimage/ColorBlock.h"
-
+#include "nvimage/BlockDXT.h"
-#include <new> // placement new
+
-
+#include <new> // placement new
-
+
-// OpenMP
+
-#if defined(HAVE_OPENMP)
+using namespace nv;
-#include <omp.h>
+using namespace nvtt;
-#endif
+
-
+/*
-using namespace nv;
+// OpenMP
-using namespace nvtt;
+#if defined(HAVE_OPENMP)
-
+#include <omp.h>
-
+#endif
-void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+
-{
+void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-    const uint bs = blockSize();
+{
-    const uint bw = (w + 3) / 4;
+    const uint bs = blockSize();
-    const uint bh = (h + 3) / 4;
+    const uint bw = (w + 3) / 4;
-
+    const uint bh = (h + 3) / 4;
-#if defined(HAVE_OPENMP)
+
-    bool singleThreaded = false;
+#if defined(HAVE_OPENMP)
-#else
+    bool singleThreaded = false;
-    bool singleThreaded = true;
+#else
-#endif
+    bool singleThreaded = true;
-
+#endif
-    // Use a single thread to compress small textures.
+
-    if (bw * bh < 16) singleThreaded = true;
+    // Use a single thread to compress small textures.
-
+    if (bw * bh < 16) singleThreaded = true;
-    if (singleThreaded)
+
-    {
+    if (singleThreaded)
-        nvDebugCheck(bs <= 16);
+    {
-        uint8 mem[16]; // @@ Output one row at a time!
+        nvDebugCheck(bs <= 16);
-
+        uint8 mem[16]; // @@ Output one row at a time!
-        for (int y = 0; y < int(h); y += 4) {
+
-            for (uint x = 0; x < w; x += 4) {
+        for (int y = 0; y < int(h); y += 4) {
-
+            for (uint x = 0; x < w; x += 4) {
-                ColorBlock rgba;
+
-                rgba.init(w, h, data, x, y);
+                ColorBlock rgba;
-
+                rgba.init(w, h, data, x, y);
-                compressBlock(rgba, alphaMode, compressionOptions, mem);
+
-
+                compressBlock(rgba, alphaMode, compressionOptions, mem);
-                if (outputOptions.outputHandler != NULL) {
+
-                    outputOptions.outputHandler->writeData(mem, bs);
+                if (outputOptions.outputHandler != NULL) {
-                }
+                    outputOptions.outputHandler->writeData(mem, bs);
-            }
+                }
-        }
+            }
-    }
+        }
-#if defined(HAVE_OPENMP)
+    }
-    else
+#if defined(HAVE_OPENMP)
-    {
+    else
-        const uint size = bs * bw * bh;
+    {
-        uint8 * mem = new uint8[size];
+        const uint size = bs * bw * bh;
-
+        uint8 * mem = new uint8[size];
-        #pragma omp parallel
+
-        {
+        #pragma omp parallel
-            #pragma omp for
+        {
-            for (int i = 0; i < int(bw*bh); i++)
+            #pragma omp for
-            {
+            for (int i = 0; i < int(bw*bh); i++)
-                const uint x = i % bw;
+            {
-                const uint y = i / bw;
+                const uint x = i % bw;
-
+                const uint y = i / bw;
-		ColorBlock rgba;
+
-		rgba.init(w, h, data, 4*x, 4*y);
+		ColorBlock rgba;
-
+		rgba.init(w, h, data, 4*x, 4*y);
-		uint8 * ptr = mem + (y * bw + x) * bs;
+
-		compressBlock(rgba, alphaMode, compressionOptions, ptr);
+		uint8 * ptr = mem + (y * bw + x) * bs;
-	    } // omp for
+		compressBlock(rgba, alphaMode, compressionOptions, ptr);
-	} // omp parallel
+	    } // omp for
-
+	} // omp parallel
-	if (outputOptions.outputHandler != NULL) {
+
-	    outputOptions.outputHandler->writeData(mem, size);
+	if (outputOptions.outputHandler != NULL) {
-	}
+	    outputOptions.outputHandler->writeData(mem, size);
-
+	}
-        delete [] mem;
+
-    }
+        delete [] mem;
-#endif
+    }
-}
+#endif
-
+}
-
+*/
-//#include "bc6h/tile.h"
+
-
+
-void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+struct CompressorContext
-{
+{
-    const uint bs = blockSize();
+    nvtt::AlphaMode alphaMode;
-    const uint bw = (w + 3) / 4;
+    uint w, h;
-    const uint bh = (h + 3) / 4;
+    const float * data;
-
+    const nvtt::CompressionOptions::Private * compressionOptions;
-    //bool singleThreaded = true;
+
-    //if (singleThreaded)
+    uint bw, bh, bs;
-    {
+    uint8 * mem;
-        uint8 * mem = malloc<uint8>(bs * bw);
+    FixedBlockCompressor * compressor;
-        uint8 * ptr = mem;
+};
-
+
-        ColorSet set;
+// Each task compresses one row.
-
+void CompressorTask(void * data, size_t y)
-        for (uint y = 0; y < h; y += 4) {
+{
-            for (uint x = 0; x < w; x += 4, ptr += bs) {
+    CompressorContext * d = (CompressorContext *) data;
-                set.setColors(data, w, h, x, y);
+
-                compressBlock(set, alphaMode, compressionOptions, ptr);
+    for (uint x = 0; x < d->bw; x++)
-            }
+    {
-
+        ColorBlock rgba;
-            if (outputOptions.outputHandler != NULL) {
+        rgba.init(d->w, d->h, d->data, 4*x, 4*y);
-                outputOptions.outputHandler->writeData(mem, bs * bw);
+
-            }
+        uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
-        }
+        d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr);
-
+    }
-        free(mem);
+}
-    }
+
-}
+void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
    CompressorContext context;
    context.alphaMode = alphaMode;
    context.w = w;
    context.h = h;
    context.data = data;
    context.compressionOptions = &compressionOptions;
    context.bs = blockSize();
    context.bw = (w + 3) / 4;
    context.bh = (h + 3) / 4;
    context.compressor = this;
    static SequentialTaskDispatcher sequential;
    static AppleTaskDispatcher concurrent;
    //TaskDispatcher * dispatcher = &sequential;
    TaskDispatcher * dispatcher = &concurrent;
    // Use a single thread to compress small textures.
    if (context.bh < 4) dispatcher = &sequential;
    const uint count = context.bw * context.bh;
    const uint size = context.bs * count;
    context.mem = new uint8[size];
    dispatcher->dispatch(CompressorTask, &context, context.bh);
    outputOptions.writeData(context.mem, size);
    delete [] context.mem;
 }
 void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
 {
    const uint bs = blockSize();
    const uint bw = (w + 3) / 4;
    const uint bh = (h + 3) / 4;
    //bool singleThreaded = true;
    //if (singleThreaded)
    {
        uint8 * mem = malloc<uint8>(bs * bw);
        uint8 * ptr = mem;
        ColorSet set;
        for (uint y = 0; y < h; y += 4) {
            for (uint x = 0; x < w; x += 4, ptr += bs) {
                set.setColors(data, w, h, x, y);
                compressBlock(set, alphaMode, compressionOptions, ptr);
            }
            if (outputOptions.outputHandler != NULL) {
                outputOptions.outputHandler->writeData(mem, bs * bw);
            }
        }
        free(mem);
    }
 }
--- a/src/nvtt/Context.cpp
+++ b/src/nvtt/Context.cpp
@ -56,7 +56,7 @@
 using namespace nv;
 using namespace nvtt;
-
+#include <dispatch/dispatch.h>
 Compressor::Compressor() : m(*new Compressor::Private())
 {
@ -71,7 +71,6 @@ Compressor::Compressor() : m(*new Compressor::Private())
 Compressor::~Compressor()
 {
    delete &m;
    cuda::exit();
 }
@ -84,23 +83,13 @@ void Compressor::enableCudaAcceleration(bool enable)
    if (m.cudaEnabled && m.cuda == NULL)
    {
-        // Select fastest CUDA device. @@ This is done automatically on current CUDA versions.
+        m.cuda = new CudaContext();
-        int device = cuda::getFastestDevice();
+
-        if (!cuda::setDevice(device))
+        if (!m.cuda->isValid())
        {
            m.cudaEnabled = false;
            m.cuda = NULL;
        }
        else
        {
            m.cuda = new CudaContext();
            if (!m.cuda->isValid())
            {
                m.cudaEnabled = false;
                m.cuda = NULL;
            }
        }
    }
 }
--- a/src/nvtt/TaskDispatcher.cpp
+++ b/src/nvtt/TaskDispatcher.cpp
--- a/src/nvtt/TaskDispatcher.h
+++ b/src/nvtt/TaskDispatcher.h
@ -0,0 +1,56 @@
 #include "nvtt.h"
 // OpenMP
 #if defined(HAVE_OPENMP)
 #include <omp.h>
 #endif
 #if NV_OS_DARWIN
 //#if defined(HAVE_DISPATCH)
 #include <dispatch/dispatch.h>
 #endif
 namespace nvtt {
    struct SequentialTaskDispatcher : public TaskDispatcher
    {
        virtual void dispatch(Task * task, void * context, size_t count) {
            for (size_t i = 0; i < count; i++) {
                task(context, i);
            }
        }
    };
 #if NV_OS_DARWIN
    // Task dispatcher using Apple's Grand Central Dispatch.
    struct AppleTaskDispatcher : public TaskDispatcher
    {
        virtual void dispatch(Task * task, void * context, size_t count) {
            dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
            dispatch_apply_f(count, q, context, task);
        }
    };
 #endif
 #if defined(HAVE_OPENMP)
    struct OpenMPTaskDispatcher : public TaskDispatcher
    {
        virtual void dispatch(Task * task, void * context, size_t count) {
            #pragma omp parallel
            {
                #pragma omp for
                for (size_t i = 0; i < count; i++) {
                    task(context, i);
                }
            }
        }
    };
 #endif
 } // namespace nvtt
--- a/src/nvtt/nvtt.cpp
+++ b/src/nvtt/nvtt.cpp
@ -56,3 +56,5 @@ unsigned int nvtt::version()
    return NVTT_VERSION;
 }
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@ -26,6 +26,8 @@
 #ifndef NVTT_H
 #define NVTT_H
 #include <stddef.h> // size_t @@ Use or own define?
 // Function linkage
 #if NVTT_SHARED
@ -331,6 +333,12 @@ namespace nvtt
        NVTT_API void setUserVersion(int version);
    };
    typedef void Task(void * context, size_t id);
    struct TaskDispatcher
    {
        virtual void dispatch(Task * task, void * context, size_t count) = 0;
    };
    /// Context.
    struct Compressor
@ -344,6 +352,7 @@ namespace nvtt
        // Context settings.
        NVTT_API void enableCudaAcceleration(bool enable);
        NVTT_API bool isCudaAccelerationEnabled() const;
        NVTT_API void setTaskDispatcher(TaskDispatcher * disp);
        // InputOptions API.
        NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
`@ -56,3 +56,5 @@ unsigned int nvtt::version()`
	`return NVTT_VERSION;`	`return NVTT_VERSION;`
	`}`	`}`