concurrency experiments.

This commit is contained in:
castano 2010-11-22 07:34:05 +00:00
parent 0bb3011f7f
commit c51f25f38d
8 changed files with 298 additions and 188 deletions

View File

@ -1,41 +1,28 @@
PROJECT(nvtt) PROJECT(nvtt)
ADD_SUBDIRECTORY(squish) ADD_SUBDIRECTORY(squish)
ADD_SUBDIRECTORY(bc6h) #ADD_SUBDIRECTORY(bc6h)
#ADD_SUBDIRECTORY(bc7) #ADD_SUBDIRECTORY(bc7)
SET(NVTT_SRCS SET(NVTT_SRCS
nvtt.h nvtt.h nvtt.cpp
nvtt.cpp nvtt_wrapper.h nvtt_wrapper.cpp
Context.h ClusterFit.h ClusterFit.cpp
Context.cpp
nvtt_wrapper.h
nvtt_wrapper.cpp
Compressor.h Compressor.h
CompressorDXT.h CompressorDXT.h CompressorDXT.cpp
CompressorDXT.cpp CompressorDX9.h CompressorDX9.cpp
CompressorDX9.h CompressorDX10.h CompressorDX10.cpp
CompressorDX9.cpp # CompressorDX11.h CompressorDX11.cpp
CompressorDX10.h CompressorRGB.h CompressorRGB.cpp
CompressorDX10.cpp CompressorRGBE.h CompressorRGBE.cpp
CompressorDX11.h Context.h Context.cpp
CompressorDX11.cpp QuickCompressDXT.h QuickCompressDXT.cpp
CompressorRGB.h OptimalCompressDXT.h OptimalCompressDXT.cpp
CompressorRGB.cpp SingleColorLookup.h SingleColorLookup.cpp
CompressorRGBE.h CompressionOptions.h CompressionOptions.cpp
CompressorRGBE.cpp InputOptions.h InputOptions.cpp
QuickCompressDXT.h OutputOptions.h OutputOptions.cpp
QuickCompressDXT.cpp TaskDispatcher.h TaskDispatcher.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h
SingleColorLookup.cpp
CompressionOptions.h
CompressionOptions.cpp
InputOptions.h
InputOptions.cpp
OutputOptions.h
OutputOptions.cpp
TexImage.h TexImage.cpp TexImage.h TexImage.cpp
cuda/CudaUtils.h cuda/CudaUtils.h
cuda/CudaUtils.cpp cuda/CudaUtils.cpp
@ -62,7 +49,7 @@ ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS}) ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED) ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish bc6h) TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish)
INSTALL(TARGETS nvtt INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin RUNTIME DESTINATION bin

View File

@ -28,6 +28,8 @@
#include "nvmath/Fitting.h" #include "nvmath/Fitting.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
#include <float.h> // FLT_MAX
using namespace nv; using namespace nv;
ClusterFit::ClusterFit() ClusterFit::ClusterFit()

View File

@ -26,6 +26,7 @@
#include "OutputOptions.h" #include "OutputOptions.h"
#include "nvtt.h" #include "nvtt.h"
#include "TaskDispatcher.h"
#include "nvcore/Memory.h" #include "nvcore/Memory.h"
@ -36,15 +37,15 @@
#include <new> // placement new #include <new> // placement new
using namespace nv;
using namespace nvtt;
/*
// OpenMP // OpenMP
#if defined(HAVE_OPENMP) #if defined(HAVE_OPENMP)
#include <omp.h> #include <omp.h>
#endif #endif
using namespace nv;
using namespace nvtt;
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint bs = blockSize(); const uint bs = blockSize();
@ -109,9 +110,73 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
} }
#endif #endif
} }
*/
struct CompressorContext
{
nvtt::AlphaMode alphaMode;
uint w, h;
const float * data;
const nvtt::CompressionOptions::Private * compressionOptions;
uint bw, bh, bs;
uint8 * mem;
FixedBlockCompressor * compressor;
};
// Each task compresses one row.
void CompressorTask(void * data, size_t y)
{
CompressorContext * d = (CompressorContext *) data;
for (uint x = 0; x < d->bw; x++)
{
ColorBlock rgba;
rgba.init(d->w, d->h, d->data, 4*x, 4*y);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr);
}
}
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
CompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
context.compressor = this;
static SequentialTaskDispatcher sequential;
static AppleTaskDispatcher concurrent;
//TaskDispatcher * dispatcher = &sequential;
TaskDispatcher * dispatcher = &concurrent;
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(CompressorTask, &context, context.bh);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}
//#include "bc6h/tile.h"
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {

View File

@ -56,7 +56,7 @@
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
#include <dispatch/dispatch.h>
Compressor::Compressor() : m(*new Compressor::Private()) Compressor::Compressor() : m(*new Compressor::Private())
{ {
@ -71,7 +71,6 @@ Compressor::Compressor() : m(*new Compressor::Private())
Compressor::~Compressor() Compressor::~Compressor()
{ {
delete &m; delete &m;
cuda::exit();
} }
@ -84,23 +83,13 @@ void Compressor::enableCudaAcceleration(bool enable)
if (m.cudaEnabled && m.cuda == NULL) if (m.cudaEnabled && m.cuda == NULL)
{ {
// Select fastest CUDA device. @@ This is done automatically on current CUDA versions. m.cuda = new CudaContext();
int device = cuda::getFastestDevice();
if (!cuda::setDevice(device)) if (!m.cuda->isValid())
{ {
m.cudaEnabled = false; m.cudaEnabled = false;
m.cuda = NULL; m.cuda = NULL;
} }
else
{
m.cuda = new CudaContext();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
} }
} }

View File

56
src/nvtt/TaskDispatcher.h Normal file
View File

@ -0,0 +1,56 @@
#include "nvtt.h"
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
#if NV_OS_DARWIN
//#if defined(HAVE_DISPATCH)
#include <dispatch/dispatch.h>
#endif
namespace nvtt {
struct SequentialTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
for (size_t i = 0; i < count; i++) {
task(context, i);
}
}
};
#if NV_OS_DARWIN
// Task dispatcher using Apple's Grand Central Dispatch.
struct AppleTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
dispatch_apply_f(count, q, context, task);
}
};
#endif
#if defined(HAVE_OPENMP)
struct OpenMPTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
#pragma omp parallel
{
#pragma omp for
for (size_t i = 0; i < count; i++) {
task(context, i);
}
}
}
};
#endif
} // namespace nvtt

View File

@ -56,3 +56,5 @@ unsigned int nvtt::version()
return NVTT_VERSION; return NVTT_VERSION;
} }

View File

@ -26,6 +26,8 @@
#ifndef NVTT_H #ifndef NVTT_H
#define NVTT_H #define NVTT_H
#include <stddef.h> // size_t @@ Use or own define?
// Function linkage // Function linkage
#if NVTT_SHARED #if NVTT_SHARED
@ -331,6 +333,12 @@ namespace nvtt
NVTT_API void setUserVersion(int version); NVTT_API void setUserVersion(int version);
}; };
typedef void Task(void * context, size_t id);
struct TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) = 0;
};
/// Context. /// Context.
struct Compressor struct Compressor
@ -344,6 +352,7 @@ namespace nvtt
// Context settings. // Context settings.
NVTT_API void enableCudaAcceleration(bool enable); NVTT_API void enableCudaAcceleration(bool enable);
NVTT_API bool isCudaAccelerationEnabled() const; NVTT_API bool isCudaAccelerationEnabled() const;
NVTT_API void setTaskDispatcher(TaskDispatcher * disp);
// InputOptions API. // InputOptions API.
NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;