diff --git a/src/nvtt/CompressorDXT.cpp b/src/nvtt/CompressorDXT.cpp
index d6c22ea..10c6550 100644
--- a/src/nvtt/CompressorDXT.cpp
+++ b/src/nvtt/CompressorDXT.cpp
@@ -126,11 +126,14 @@ struct CompressorContext
 };
 
 // Each task compresses one row.
-void CompressorTask(void * data, size_t y)
+void CompressorTask(void * data, size_t i)
 {
     CompressorContext * d = (CompressorContext *) data;
 
-    for (uint x = 0; x < d->bw; x++)
+    uint x = i % d->bw;
+    uint y = i / d->bw;
+
+    //for (uint x = 0; x < d->bw; x++)
     {
         ColorBlock rgba;
         rgba.init(d->w, d->h, d->data, 4*x, 4*y);
@@ -156,10 +159,11 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
     context.compressor = this;
 
     static SequentialTaskDispatcher sequential;
-	//#static AppleTaskDispatcher concurrent;
+	//static AppleTaskDispatcher concurrent;
+    static OpenMPTaskDispatcher concurrent;
 
-    TaskDispatcher * dispatcher = &sequential;
-    //TaskDispatcher * dispatcher = &concurrent;
+    //TaskDispatcher * dispatcher = &sequential;
+    TaskDispatcher * dispatcher = &concurrent;
 
     // Use a single thread to compress small textures.
     if (context.bh < 4) dispatcher = &sequential;
@@ -168,7 +172,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
     const uint size = context.bs * count;
     context.mem = new uint8[size];
 
-    dispatcher->dispatch(CompressorTask, &context, context.bh);
+    dispatcher->dispatch(CompressorTask, &context, count);
 
     outputOptions.writeData(context.mem, size);
 
diff --git a/src/nvtt/TaskDispatcher.h b/src/nvtt/TaskDispatcher.h
index 0995dc9..b489511 100644
--- a/src/nvtt/TaskDispatcher.h
+++ b/src/nvtt/TaskDispatcher.h
@@ -2,10 +2,13 @@
 #include "nvtt.h"
 
 // OpenMP
+// http://en.wikipedia.org/wiki/OpenMP
 #if defined(HAVE_OPENMP)
 #include <omp.h>
 #endif
 
+// Gran Central Dispatch (GCD/libdispatch)
+// http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
 #if NV_OS_DARWIN && defined(HAVE_DISPATCH_H)
 #include <dispatch/dispatch.h>
 #endif
@@ -14,11 +17,15 @@
 #define HAVE_PPL 1
 #endif
 
+// Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime: 
+// http://msdn.microsoft.com/en-us/library/dd504870.aspx
 #if defined(HAVE_PPL)
 #include <array>
 //#include <ppl.h>
 #endif
 
+// Intel Thread Building Blocks (TBB).
+// http://www.threadingbuildingblocks.org/
 #if defined(HAVE_TBB)
 #include <tbb/parallel_for.h>
 #endif
@@ -35,6 +42,20 @@ namespace nvtt {
         }
     };
 
+#if defined(HAVE_OPENMP)
+
+    struct OpenMPTaskDispatcher : public TaskDispatcher
+    {
+        virtual void dispatch(Task * task, void * context, size_t count) {
+            #pragma omp parallel for
+            for (int i = 0; i < int(count); i++) {
+                task(context, i);
+            }
+        }
+    };
+
+#endif
+
 #if NV_OS_DARWIN && defined(HAVE_DISPATCH_H)
 
     // Task dispatcher using Apple's Grand Central Dispatch.
@@ -48,20 +69,6 @@ namespace nvtt {
 
 #endif
 
-#if defined(HAVE_OPENMP)
-
-    struct OpenMPTaskDispatcher : public TaskDispatcher
-    {
-        virtual void dispatch(Task * task, void * context, size_t count) {
-            #pragma omp parallel for
-            for (size_t i = 0; i < count; i++) {
-                task(context, i);
-            }
-        }
-    };
-
-#endif
-
 #if defined(HAVE_PPL)
 
     class CountingIterator
@@ -125,5 +132,4 @@ namespace nvtt {
 
 #endif
 
-
 } // namespace nvtt