diff --git a/src/nvtt/Compressor.h b/src/nvtt/Compressor.h index ed10160..6ad84c1 100644 --- a/src/nvtt/Compressor.h +++ b/src/nvtt/Compressor.h @@ -30,11 +30,11 @@ namespace nv { - struct CompressorInterface - { - virtual ~CompressorInterface() {} - virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0; - }; + struct CompressorInterface + { + virtual ~CompressorInterface() {} + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0; + }; } // nv namespace diff --git a/src/nvtt/CompressorDXT.cpp b/src/nvtt/CompressorDXT.cpp index 10c6550..3697464 100644 --- a/src/nvtt/CompressorDXT.cpp +++ b/src/nvtt/CompressorDXT.cpp @@ -126,7 +126,7 @@ struct CompressorContext }; // Each task compresses one row. -void CompressorTask(void * data, size_t i) +void CompressorTask(void * data, int i) { CompressorContext * d = (CompressorContext *) data; @@ -143,7 +143,7 @@ void CompressorTask(void * data, size_t i) } } -void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) { CompressorContext context; context.alphaMode = alphaMode; @@ -158,12 +158,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c context.compressor = this; - static SequentialTaskDispatcher sequential; - //static AppleTaskDispatcher concurrent; - static OpenMPTaskDispatcher concurrent; - - //TaskDispatcher * dispatcher = &sequential; - TaskDispatcher * dispatcher = &concurrent; + SequentialTaskDispatcher sequential; // Use a single thread to compress small textures. if (context.bh < 4) dispatcher = &sequential; @@ -182,7 +177,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c -void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) { const uint bs = blockSize(); const uint bw = (w + 3) / 4; diff --git a/src/nvtt/CompressorDXT.h b/src/nvtt/CompressorDXT.h index 8a5f1bb..b3b5b55 100644 --- a/src/nvtt/CompressorDXT.h +++ b/src/nvtt/CompressorDXT.h @@ -35,7 +35,7 @@ namespace nv struct FixedBlockCompressor : public CompressorInterface { - virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; virtual uint blockSize() const = 0; @@ -43,7 +43,7 @@ namespace nv struct ColorSetCompressor : public CompressorInterface { - virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; virtual uint blockSize() const = 0; diff --git a/src/nvtt/CompressorRGB.cpp b/src/nvtt/CompressorRGB.cpp index 990a33a..a5bb4f0 100644 --- a/src/nvtt/CompressorRGB.cpp +++ b/src/nvtt/CompressorRGB.cpp @@ -122,7 +122,7 @@ namespace -void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) { nvDebugCheck (compressionOptions.format == nvtt::Format_RGBA); diff --git a/src/nvtt/CompressorRGB.h b/src/nvtt/CompressorRGB.h index 0ae5a1d..bdfea0e 100644 --- a/src/nvtt/CompressorRGB.h +++ b/src/nvtt/CompressorRGB.h @@ -31,7 +31,7 @@ namespace nv { struct PixelFormatConverter : public CompressorInterface { - virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); }; } // nv namespace diff --git a/src/nvtt/CompressorRGBE.cpp b/src/nvtt/CompressorRGBE.cpp index 4fc4354..232a939 100644 --- a/src/nvtt/CompressorRGBE.cpp +++ b/src/nvtt/CompressorRGBE.cpp @@ -56,7 +56,7 @@ static Color32 toRgbe8(float r, float g, float b) } -void CompressorRGBE::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +void CompressorRGBE::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) { nvDebugCheck (compressionOptions.format == nvtt::Format_RGBE); diff --git a/src/nvtt/CompressorRGBE.h b/src/nvtt/CompressorRGBE.h index d7c18b7..bbb625a 100644 --- a/src/nvtt/CompressorRGBE.h +++ b/src/nvtt/CompressorRGBE.h @@ -30,7 +30,7 @@ namespace nv { struct CompressorRGBE : public CompressorInterface { - virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); }; } // nv namespace diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp index 5e5a794..6950187 100644 --- a/src/nvtt/Context.cpp +++ b/src/nvtt/Context.cpp @@ -64,6 +64,8 @@ Compressor::Compressor() : m(*new Compressor::Private()) m.cuda = NULL; enableCudaAcceleration(m.cudaSupported); + + m.dispatcher = &m.defaultDispatcher; } Compressor::~Compressor() @@ -96,6 +98,16 @@ bool Compressor::isCudaAccelerationEnabled() const return m.cudaEnabled; } +void Compressor::setTaskDispatcher(TaskDispatcher * disp) +{ + if (disp == NULL) { + m.dispatcher = &m.defaultDispatcher; + } + else { + m.dispatcher = disp; + } +} + // Input Options API. bool Compressor::process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const @@ -318,7 +330,7 @@ bool Compressor::Private::compress(AlphaMode alphaMode, int w, int h, int d, int } else { - compressor->compress(alphaMode, w, h, rgba, compressionOptions, outputOptions); + compressor->compress(alphaMode, w, h, rgba, dispatcher, compressionOptions, outputOptions); } return true; diff --git a/src/nvtt/Context.h b/src/nvtt/Context.h index 523ad02..1ee7352 100644 --- a/src/nvtt/Context.h +++ b/src/nvtt/Context.h @@ -30,6 +30,7 @@ #include "nvtt/Compressor.h" #include "nvtt/cuda/CudaCompressorDXT.h" #include "nvtt.h" +#include "TaskDispatcher.h" namespace nv { @@ -51,18 +52,19 @@ namespace nvtt void quantize(TexImage & tex, const CompressionOptions::Private & compressionOptions) const; bool outputHeader(nvtt::TextureType textureType, int w, int h, int d, int mipmapCount, bool isNormalMap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; - //bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; - nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const; - nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const; + nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const; + nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const; + bool cudaSupported; + bool cudaEnabled; - bool cudaSupported; - bool cudaEnabled; - - nv::AutoPtr cuda; + nv::AutoPtr cuda; + TaskDispatcher * dispatcher; + //SequentialTaskDispatcher defaultDispatcher; + ConcurrentTaskDispatcher defaultDispatcher; }; } // nvtt namespace diff --git a/src/nvtt/TaskDispatcher.h b/src/nvtt/TaskDispatcher.h index b489511..8f7a90c 100644 --- a/src/nvtt/TaskDispatcher.h +++ b/src/nvtt/TaskDispatcher.h @@ -10,16 +10,14 @@ // Gran Central Dispatch (GCD/libdispatch) // http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html #if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) +#define HAVE_GCD 1 #include #endif -#if NV_OS_WIN32 && _MSC_VER >= 1600 -#define HAVE_PPL 1 -#endif - // Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime: // http://msdn.microsoft.com/en-us/library/dd504870.aspx -#if defined(HAVE_PPL) +#if NV_OS_WIN32 && _MSC_VER >= 1600 +#define HAVE_PPL 1 #include //#include #endif @@ -35,8 +33,8 @@ namespace nvtt { struct SequentialTaskDispatcher : public TaskDispatcher { - virtual void dispatch(Task * task, void * context, size_t count) { - for (size_t i = 0; i < count; i++) { + virtual void dispatch(Task * task, void * context, int count) { + for (int i = 0; i < count; i++) { task(context, i); } } @@ -46,9 +44,9 @@ namespace nvtt { struct OpenMPTaskDispatcher : public TaskDispatcher { - virtual void dispatch(Task * task, void * context, size_t count) { + virtual void dispatch(Task * task, void * context, int count) { #pragma omp parallel for - for (int i = 0; i < int(count); i++) { + for (int i = 0; i < count; i++) { task(context, i); } } @@ -61,9 +59,21 @@ namespace nvtt { // Task dispatcher using Apple's Grand Central Dispatch. struct AppleTaskDispatcher : public TaskDispatcher { - virtual void dispatch(Task * task, void * context, size_t count) { - dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0); - dispatch_apply_f(count, q, context, task); + // @@ This is really lame, but I refuse to use size_t in the public API. + struct BlockContext { + Task * task; + void * context; + }; + + static void block(void * context, size_t id) { + BlockContext * ctx = (BlockContext *)context; + ctx->task(ctx->context, int(id)); + } + + virtual void dispatch(Task * task, void * context, int count) { + dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0); + BlockContext blockCtx = { task, context }; + dispatch_apply_f(count, q, &blockCtx, block); } }; @@ -71,42 +81,41 @@ namespace nvtt { #if defined(HAVE_PPL) - class CountingIterator - { - public: - CountingIterator() : i(0) {} - CountingIterator(const CountingIterator & rhs) : i(0) {} - explicit CountingIterator(int x) : i(x) {} - - //const int & base() const; - const int & operator*() const { return i; } - CountingIterator & operator++() { i++; return *this; } - CountingIterator & operator--() { i--; return *this; } - - private: - int i; + class CountingIterator + { + public: + CountingIterator() : i(0) {} + CountingIterator(const CountingIterator & rhs) : i(0) {} + explicit CountingIterator(int x) : i(x) {} + + const int & operator*() const { return i; } + CountingIterator & operator++() { i++; return *this; } + CountingIterator & operator--() { i--; return *this; } + + private: + int i; }; - struct TaskFunctor { - TaskFunctor(Task * task, void * context) : task(task), context(context) {} - void operator()(int & n) const { - task(context, n); - } - Task * task; - void * context; - }; + struct TaskFunctor { + TaskFunctor(Task * task, void * context) : task(task), context(context) {} + void operator()(int & n) const { + task(context, n); + } + Task * task; + void * context; + }; - // Using Microsoft's concurrency runtime. + // Task dispatcher using Microsoft's concurrency runtime. struct MicrosoftTaskDispatcher : public TaskDispatcher { - virtual void dispatch(Task * task, void * context, size_t count) + virtual void dispatch(Task * task, void * context, int count) { CountingIterator begin(0); CountingIterator end((int)count); TaskFunctor func(task, context); std::for_each(begin, end, func); - //std::parallel_for_each(begin, end, func); + //parallel_for_each(begin, end, func); } }; @@ -114,22 +123,35 @@ namespace nvtt { #if defined(HAVE_TBB) - struct TaskFunctor { - TaskFunctor(Task * task, void * context) : task(task), context(context) {} - void operator()(int & n) const { - task(context, n); - } - Task * task; - void * context; + struct TaskFunctor { + TaskFunctor(Task * task, void * context) : task(task), context(context) {} + void operator()(int & n) const { + task(context, n); + } + Task * task; + void * context; }; + // Task dispatcher using Inte's Thread Building Blocks. struct IntelTaskDispatcher : public TaskDispatcher { - virtual void dispatch(Task * task, void * context, size_t count) { - parallel_for(blocked_range(0, count, 1), TaskFunctor(task, context)); + virtual void dispatch(Task * task, void * context, int count) { + parallel_for(blocked_range(0, count, 1), TaskFunctor(task, context)); } }; #endif +#if defined(HAVE_OPENMP) + typedef OpenMPTaskDispatcher ConcurrentTaskDispatcher; +#elif defined(HAVE_TBB) + typedef IntelTaskDispatcher ConcurrentTaskDispatcher; +#elif defined(HAVE_PPL) + typedef MicrosoftTaskDispatcher ConcurrentTaskDispatcher; +#elif defined(HAVE_GCD) + typedef AppleTaskDispatcher ConcurrentTaskDispatcher; +#else + typedef SequentialTaskDispatcher ConcurrentTaskDispatcher; +#endif + } // namespace nvtt diff --git a/src/nvtt/cuda/CudaCompressorDXT.cpp b/src/nvtt/cuda/CudaCompressorDXT.cpp index b27f515..50c5143 100644 --- a/src/nvtt/cuda/CudaCompressorDXT.cpp +++ b/src/nvtt/cuda/CudaCompressorDXT.cpp @@ -125,7 +125,7 @@ CudaCompressor::CudaCompressor(CudaContext & ctx) : m_ctx(ctx) } -void CudaCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +void CudaCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) { nvDebugCheck(cuda::isHardwarePresent()); diff --git a/src/nvtt/cuda/CudaCompressorDXT.h b/src/nvtt/cuda/CudaCompressorDXT.h index c6d1314..cc08759 100644 --- a/src/nvtt/cuda/CudaCompressorDXT.h +++ b/src/nvtt/cuda/CudaCompressorDXT.h @@ -54,7 +54,7 @@ namespace nv { CudaCompressor(CudaContext & ctx); - virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) = 0; virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index a70116a..2df502a 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -26,8 +26,6 @@ #ifndef NVTT_H #define NVTT_H -#include // size_t @@ Use or own define? - // Function linkage #if NVTT_SHARED @@ -333,11 +331,11 @@ namespace nvtt NVTT_API void setUserVersion(int version); }; - typedef void Task(void * context, size_t id); + typedef void Task(void * context, int id); struct TaskDispatcher { - virtual void dispatch(Task * task, void * context, size_t count) = 0; + virtual void dispatch(Task * task, void * context, int count) = 0; }; /// Context.