From c51f25f38dd2066133f6fb7f4c8989f893322b77 Mon Sep 17 00:00:00 2001 From: castano Date: Mon, 22 Nov 2010 07:34:05 +0000 Subject: [PATCH] concurrency experiments. --- src/nvtt/CMakeLists.txt | 51 ++---- src/nvtt/ClusterFit.cpp | 2 + src/nvtt/CompressorDXT.cpp | 347 +++++++++++++++++++++--------------- src/nvtt/Context.cpp | 19 +- src/nvtt/TaskDispatcher.cpp | 0 src/nvtt/TaskDispatcher.h | 56 ++++++ src/nvtt/nvtt.cpp | 2 + src/nvtt/nvtt.h | 9 + 8 files changed, 298 insertions(+), 188 deletions(-) create mode 100644 src/nvtt/TaskDispatcher.cpp create mode 100644 src/nvtt/TaskDispatcher.h diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt index d870834..793e0e5 100644 --- a/src/nvtt/CMakeLists.txt +++ b/src/nvtt/CMakeLists.txt @@ -1,41 +1,28 @@ PROJECT(nvtt) ADD_SUBDIRECTORY(squish) -ADD_SUBDIRECTORY(bc6h) +#ADD_SUBDIRECTORY(bc6h) #ADD_SUBDIRECTORY(bc7) SET(NVTT_SRCS - nvtt.h - nvtt.cpp - Context.h - Context.cpp - nvtt_wrapper.h - nvtt_wrapper.cpp + nvtt.h nvtt.cpp + nvtt_wrapper.h nvtt_wrapper.cpp + ClusterFit.h ClusterFit.cpp Compressor.h - CompressorDXT.h - CompressorDXT.cpp - CompressorDX9.h - CompressorDX9.cpp - CompressorDX10.h - CompressorDX10.cpp - CompressorDX11.h - CompressorDX11.cpp - CompressorRGB.h - CompressorRGB.cpp - CompressorRGBE.h - CompressorRGBE.cpp - QuickCompressDXT.h - QuickCompressDXT.cpp - OptimalCompressDXT.h - OptimalCompressDXT.cpp - SingleColorLookup.h - SingleColorLookup.cpp - CompressionOptions.h - CompressionOptions.cpp - InputOptions.h - InputOptions.cpp - OutputOptions.h - OutputOptions.cpp + CompressorDXT.h CompressorDXT.cpp + CompressorDX9.h CompressorDX9.cpp + CompressorDX10.h CompressorDX10.cpp +# CompressorDX11.h CompressorDX11.cpp + CompressorRGB.h CompressorRGB.cpp + CompressorRGBE.h CompressorRGBE.cpp + Context.h Context.cpp + QuickCompressDXT.h QuickCompressDXT.cpp + OptimalCompressDXT.h OptimalCompressDXT.cpp + SingleColorLookup.h SingleColorLookup.cpp + CompressionOptions.h CompressionOptions.cpp + InputOptions.h InputOptions.cpp + OutputOptions.h OutputOptions.cpp + TaskDispatcher.h TaskDispatcher.cpp TexImage.h TexImage.cpp cuda/CudaUtils.h cuda/CudaUtils.cpp @@ -62,7 +49,7 @@ ELSE(NVTT_SHARED) ADD_LIBRARY(nvtt ${NVTT_SRCS}) ENDIF(NVTT_SHARED) -TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish bc6h) +TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish) INSTALL(TARGETS nvtt RUNTIME DESTINATION bin diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp index 7f21813..20c59ca 100644 --- a/src/nvtt/ClusterFit.cpp +++ b/src/nvtt/ClusterFit.cpp @@ -28,6 +28,8 @@ #include "nvmath/Fitting.h" #include "nvimage/ColorBlock.h" +#include // FLT_MAX + using namespace nv; ClusterFit::ClusterFit() diff --git a/src/nvtt/CompressorDXT.cpp b/src/nvtt/CompressorDXT.cpp index 4398f5c..76f4ed6 100644 --- a/src/nvtt/CompressorDXT.cpp +++ b/src/nvtt/CompressorDXT.cpp @@ -1,143 +1,208 @@ // Copyright (c) 2009-2011 Ignacio Castano // Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "CompressorDXT.h" -#include "OutputOptions.h" - -#include "nvtt.h" - -#include "nvcore/Memory.h" - -#include "nvimage/Image.h" -#include "nvimage/ColorBlock.h" -#include "nvimage/BlockDXT.h" - -#include // placement new - - -// OpenMP -#if defined(HAVE_OPENMP) -#include -#endif - -using namespace nv; -using namespace nvtt; - - -void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - const uint bs = blockSize(); - const uint bw = (w + 3) / 4; - const uint bh = (h + 3) / 4; - -#if defined(HAVE_OPENMP) - bool singleThreaded = false; -#else - bool singleThreaded = true; -#endif - - // Use a single thread to compress small textures. - if (bw * bh < 16) singleThreaded = true; - - if (singleThreaded) - { - nvDebugCheck(bs <= 16); - uint8 mem[16]; // @@ Output one row at a time! - - for (int y = 0; y < int(h); y += 4) { - for (uint x = 0; x < w; x += 4) { - - ColorBlock rgba; - rgba.init(w, h, data, x, y); - - compressBlock(rgba, alphaMode, compressionOptions, mem); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(mem, bs); - } - } - } - } -#if defined(HAVE_OPENMP) - else - { - const uint size = bs * bw * bh; - uint8 * mem = new uint8[size]; - - #pragma omp parallel - { - #pragma omp for - for (int i = 0; i < int(bw*bh); i++) - { - const uint x = i % bw; - const uint y = i / bw; - - ColorBlock rgba; - rgba.init(w, h, data, 4*x, 4*y); - - uint8 * ptr = mem + (y * bw + x) * bs; - compressBlock(rgba, alphaMode, compressionOptions, ptr); - } // omp for - } // omp parallel - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(mem, size); - } - - delete [] mem; - } -#endif -} - - -//#include "bc6h/tile.h" - -void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - const uint bs = blockSize(); - const uint bw = (w + 3) / 4; - const uint bh = (h + 3) / 4; - - //bool singleThreaded = true; - //if (singleThreaded) - { - uint8 * mem = malloc(bs * bw); - uint8 * ptr = mem; - - ColorSet set; - - for (uint y = 0; y < h; y += 4) { - for (uint x = 0; x < w; x += 4, ptr += bs) { - set.setColors(data, w, h, x, y); - compressBlock(set, alphaMode, compressionOptions, ptr); - } - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(mem, bs * bw); - } - } - - free(mem); - } -} +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "CompressorDXT.h" +#include "OutputOptions.h" + +#include "nvtt.h" +#include "TaskDispatcher.h" + +#include "nvcore/Memory.h" + +#include "nvimage/Image.h" +#include "nvimage/ColorBlock.h" +#include "nvimage/BlockDXT.h" + +#include // placement new + + +using namespace nv; +using namespace nvtt; + +/* +// OpenMP +#if defined(HAVE_OPENMP) +#include +#endif + +void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + const uint bs = blockSize(); + const uint bw = (w + 3) / 4; + const uint bh = (h + 3) / 4; + +#if defined(HAVE_OPENMP) + bool singleThreaded = false; +#else + bool singleThreaded = true; +#endif + + // Use a single thread to compress small textures. + if (bw * bh < 16) singleThreaded = true; + + if (singleThreaded) + { + nvDebugCheck(bs <= 16); + uint8 mem[16]; // @@ Output one row at a time! + + for (int y = 0; y < int(h); y += 4) { + for (uint x = 0; x < w; x += 4) { + + ColorBlock rgba; + rgba.init(w, h, data, x, y); + + compressBlock(rgba, alphaMode, compressionOptions, mem); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(mem, bs); + } + } + } + } +#if defined(HAVE_OPENMP) + else + { + const uint size = bs * bw * bh; + uint8 * mem = new uint8[size]; + + #pragma omp parallel + { + #pragma omp for + for (int i = 0; i < int(bw*bh); i++) + { + const uint x = i % bw; + const uint y = i / bw; + + ColorBlock rgba; + rgba.init(w, h, data, 4*x, 4*y); + + uint8 * ptr = mem + (y * bw + x) * bs; + compressBlock(rgba, alphaMode, compressionOptions, ptr); + } // omp for + } // omp parallel + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(mem, size); + } + + delete [] mem; + } +#endif +} +*/ + + +struct CompressorContext +{ + nvtt::AlphaMode alphaMode; + uint w, h; + const float * data; + const nvtt::CompressionOptions::Private * compressionOptions; + + uint bw, bh, bs; + uint8 * mem; + FixedBlockCompressor * compressor; +}; + +// Each task compresses one row. +void CompressorTask(void * data, size_t y) +{ + CompressorContext * d = (CompressorContext *) data; + + for (uint x = 0; x < d->bw; x++) + { + ColorBlock rgba; + rgba.init(d->w, d->h, d->data, 4*x, 4*y); + + uint8 * ptr = d->mem + (y * d->bw + x) * d->bs; + d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr); + } +} + +void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +{ + CompressorContext context; + context.alphaMode = alphaMode; + context.w = w; + context.h = h; + context.data = data; + context.compressionOptions = &compressionOptions; + + context.bs = blockSize(); + context.bw = (w + 3) / 4; + context.bh = (h + 3) / 4; + + context.compressor = this; + + static SequentialTaskDispatcher sequential; + static AppleTaskDispatcher concurrent; + + //TaskDispatcher * dispatcher = &sequential; + TaskDispatcher * dispatcher = &concurrent; + + // Use a single thread to compress small textures. + if (context.bh < 4) dispatcher = &sequential; + + const uint count = context.bw * context.bh; + const uint size = context.bs * count; + context.mem = new uint8[size]; + + dispatcher->dispatch(CompressorTask, &context, context.bh); + + outputOptions.writeData(context.mem, size); + + delete [] context.mem; +} + + + + +void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + const uint bs = blockSize(); + const uint bw = (w + 3) / 4; + const uint bh = (h + 3) / 4; + + //bool singleThreaded = true; + //if (singleThreaded) + { + uint8 * mem = malloc(bs * bw); + uint8 * ptr = mem; + + ColorSet set; + + for (uint y = 0; y < h; y += 4) { + for (uint x = 0; x < w; x += 4, ptr += bs) { + set.setColors(data, w, h, x, y); + compressBlock(set, alphaMode, compressionOptions, ptr); + } + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(mem, bs * bw); + } + } + + free(mem); + } +} diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp index 4ecaeb5..27a7837 100644 --- a/src/nvtt/Context.cpp +++ b/src/nvtt/Context.cpp @@ -56,7 +56,7 @@ using namespace nv; using namespace nvtt; - +#include Compressor::Compressor() : m(*new Compressor::Private()) { @@ -71,7 +71,6 @@ Compressor::Compressor() : m(*new Compressor::Private()) Compressor::~Compressor() { delete &m; - cuda::exit(); } @@ -84,23 +83,13 @@ void Compressor::enableCudaAcceleration(bool enable) if (m.cudaEnabled && m.cuda == NULL) { - // Select fastest CUDA device. @@ This is done automatically on current CUDA versions. - int device = cuda::getFastestDevice(); - if (!cuda::setDevice(device)) + m.cuda = new CudaContext(); + + if (!m.cuda->isValid()) { m.cudaEnabled = false; m.cuda = NULL; } - else - { - m.cuda = new CudaContext(); - - if (!m.cuda->isValid()) - { - m.cudaEnabled = false; - m.cuda = NULL; - } - } } } diff --git a/src/nvtt/TaskDispatcher.cpp b/src/nvtt/TaskDispatcher.cpp new file mode 100644 index 0000000..e69de29 diff --git a/src/nvtt/TaskDispatcher.h b/src/nvtt/TaskDispatcher.h new file mode 100644 index 0000000..f11bd83 --- /dev/null +++ b/src/nvtt/TaskDispatcher.h @@ -0,0 +1,56 @@ + +#include "nvtt.h" + +// OpenMP +#if defined(HAVE_OPENMP) +#include +#endif + +#if NV_OS_DARWIN +//#if defined(HAVE_DISPATCH) +#include +#endif + +namespace nvtt { + + struct SequentialTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, size_t count) { + for (size_t i = 0; i < count; i++) { + task(context, i); + } + } + }; + +#if NV_OS_DARWIN + + // Task dispatcher using Apple's Grand Central Dispatch. + struct AppleTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, size_t count) { + dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0); + dispatch_apply_f(count, q, context, task); + } + }; + +#endif + +#if defined(HAVE_OPENMP) + + struct OpenMPTaskDispatcher : public TaskDispatcher + { + virtual void dispatch(Task * task, void * context, size_t count) { + #pragma omp parallel + { + #pragma omp for + for (size_t i = 0; i < count; i++) { + task(context, i); + } + } + } + }; + +#endif + + +} // namespace nvtt diff --git a/src/nvtt/nvtt.cpp b/src/nvtt/nvtt.cpp index 5698b27..83b9aac 100644 --- a/src/nvtt/nvtt.cpp +++ b/src/nvtt/nvtt.cpp @@ -56,3 +56,5 @@ unsigned int nvtt::version() return NVTT_VERSION; } + + diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index c17c359..a70116a 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -26,6 +26,8 @@ #ifndef NVTT_H #define NVTT_H +#include // size_t @@ Use or own define? + // Function linkage #if NVTT_SHARED @@ -331,6 +333,12 @@ namespace nvtt NVTT_API void setUserVersion(int version); }; + typedef void Task(void * context, size_t id); + + struct TaskDispatcher + { + virtual void dispatch(Task * task, void * context, size_t count) = 0; + }; /// Context. struct Compressor @@ -344,6 +352,7 @@ namespace nvtt // Context settings. NVTT_API void enableCudaAcceleration(bool enable); NVTT_API bool isCudaAccelerationEnabled() const; + NVTT_API void setTaskDispatcher(TaskDispatcher * disp); // InputOptions API. NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;