diff --git a/src/nvtt/CompressorDXT.cpp b/src/nvtt/CompressorDXT.cpp index d6c22ea..10c6550 100644 --- a/src/nvtt/CompressorDXT.cpp +++ b/src/nvtt/CompressorDXT.cpp @@ -126,11 +126,14 @@ struct CompressorContext }; // Each task compresses one row. -void CompressorTask(void * data, size_t y) +void CompressorTask(void * data, size_t i) { CompressorContext * d = (CompressorContext *) data; - for (uint x = 0; x < d->bw; x++) + uint x = i % d->bw; + uint y = i / d->bw; + + //for (uint x = 0; x < d->bw; x++) { ColorBlock rgba; rgba.init(d->w, d->h, d->data, 4*x, 4*y); @@ -156,10 +159,11 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c context.compressor = this; static SequentialTaskDispatcher sequential; - //#static AppleTaskDispatcher concurrent; + //static AppleTaskDispatcher concurrent; + static OpenMPTaskDispatcher concurrent; - TaskDispatcher * dispatcher = &sequential; - //TaskDispatcher * dispatcher = &concurrent; + //TaskDispatcher * dispatcher = &sequential; + TaskDispatcher * dispatcher = &concurrent; // Use a single thread to compress small textures. if (context.bh < 4) dispatcher = &sequential; @@ -168,7 +172,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c const uint size = context.bs * count; context.mem = new uint8[size]; - dispatcher->dispatch(CompressorTask, &context, context.bh); + dispatcher->dispatch(CompressorTask, &context, count); outputOptions.writeData(context.mem, size); diff --git a/src/nvtt/TaskDispatcher.h b/src/nvtt/TaskDispatcher.h index 0995dc9..b489511 100644 --- a/src/nvtt/TaskDispatcher.h +++ b/src/nvtt/TaskDispatcher.h @@ -2,10 +2,13 @@ #include "nvtt.h" // OpenMP +// http://en.wikipedia.org/wiki/OpenMP #if defined(HAVE_OPENMP) #include #endif +// Gran Central Dispatch (GCD/libdispatch) +// http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html #if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) #include #endif @@ -14,11 +17,15 @@ #define HAVE_PPL 1 #endif +// Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime: +// http://msdn.microsoft.com/en-us/library/dd504870.aspx #if defined(HAVE_PPL) #include //#include #endif +// Intel Thread Building Blocks (TBB). +// http://www.threadingbuildingblocks.org/ #if defined(HAVE_TBB) #include #endif @@ -35,6 +42,20 @@ namespace nvtt { } }; +#if defined(HAVE_OPENMP) + + struct OpenMPTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, size_t count) { + #pragma omp parallel for + for (int i = 0; i < int(count); i++) { + task(context, i); + } + } + }; + +#endif + #if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) // Task dispatcher using Apple's Grand Central Dispatch. @@ -48,20 +69,6 @@ namespace nvtt { #endif -#if defined(HAVE_OPENMP) - - struct OpenMPTaskDispatcher : public TaskDispatcher - { - virtual void dispatch(Task * task, void * context, size_t count) { - #pragma omp parallel for - for (size_t i = 0; i < count; i++) { - task(context, i); - } - } - }; - -#endif - #if defined(HAVE_PPL) class CountingIterator @@ -125,5 +132,4 @@ namespace nvtt { #endif - } // namespace nvtt