concurrency experiments.

This commit is contained in:
castano 2010-11-22 07:34:05 +00:00
parent 0bb3011f7f
commit c51f25f38d
8 changed files with 298 additions and 188 deletions

View File

@ -1,41 +1,28 @@
PROJECT(nvtt)
ADD_SUBDIRECTORY(squish)
ADD_SUBDIRECTORY(bc6h)
#ADD_SUBDIRECTORY(bc6h)
#ADD_SUBDIRECTORY(bc7)
SET(NVTT_SRCS
nvtt.h
nvtt.cpp
Context.h
Context.cpp
nvtt_wrapper.h
nvtt_wrapper.cpp
nvtt.h nvtt.cpp
nvtt_wrapper.h nvtt_wrapper.cpp
ClusterFit.h ClusterFit.cpp
Compressor.h
CompressorDXT.h
CompressorDXT.cpp
CompressorDX9.h
CompressorDX9.cpp
CompressorDX10.h
CompressorDX10.cpp
CompressorDX11.h
CompressorDX11.cpp
CompressorRGB.h
CompressorRGB.cpp
CompressorRGBE.h
CompressorRGBE.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h
SingleColorLookup.cpp
CompressionOptions.h
CompressionOptions.cpp
InputOptions.h
InputOptions.cpp
OutputOptions.h
OutputOptions.cpp
CompressorDXT.h CompressorDXT.cpp
CompressorDX9.h CompressorDX9.cpp
CompressorDX10.h CompressorDX10.cpp
# CompressorDX11.h CompressorDX11.cpp
CompressorRGB.h CompressorRGB.cpp
CompressorRGBE.h CompressorRGBE.cpp
Context.h Context.cpp
QuickCompressDXT.h QuickCompressDXT.cpp
OptimalCompressDXT.h OptimalCompressDXT.cpp
SingleColorLookup.h SingleColorLookup.cpp
CompressionOptions.h CompressionOptions.cpp
InputOptions.h InputOptions.cpp
OutputOptions.h OutputOptions.cpp
TaskDispatcher.h TaskDispatcher.cpp
TexImage.h TexImage.cpp
cuda/CudaUtils.h
cuda/CudaUtils.cpp
@ -62,7 +49,7 @@ ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish bc6h)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish)
INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin

View File

@ -28,6 +28,8 @@
#include "nvmath/Fitting.h"
#include "nvimage/ColorBlock.h"
#include <float.h> // FLT_MAX
using namespace nv;
ClusterFit::ClusterFit()

View File

@ -26,6 +26,7 @@
#include "OutputOptions.h"
#include "nvtt.h"
#include "TaskDispatcher.h"
#include "nvcore/Memory.h"
@ -36,15 +37,15 @@
#include <new> // placement new
using namespace nv;
using namespace nvtt;
/*
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
using namespace nv;
using namespace nvtt;
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
@ -109,9 +110,73 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
}
#endif
}
*/
struct CompressorContext
{
nvtt::AlphaMode alphaMode;
uint w, h;
const float * data;
const nvtt::CompressionOptions::Private * compressionOptions;
uint bw, bh, bs;
uint8 * mem;
FixedBlockCompressor * compressor;
};
// Each task compresses one row.
void CompressorTask(void * data, size_t y)
{
CompressorContext * d = (CompressorContext *) data;
for (uint x = 0; x < d->bw; x++)
{
ColorBlock rgba;
rgba.init(d->w, d->h, d->data, 4*x, 4*y);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr);
}
}
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
CompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
context.compressor = this;
static SequentialTaskDispatcher sequential;
static AppleTaskDispatcher concurrent;
//TaskDispatcher * dispatcher = &sequential;
TaskDispatcher * dispatcher = &concurrent;
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(CompressorTask, &context, context.bh);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}
//#include "bc6h/tile.h"
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{

View File

@ -56,7 +56,7 @@
using namespace nv;
using namespace nvtt;
#include <dispatch/dispatch.h>
Compressor::Compressor() : m(*new Compressor::Private())
{
@ -71,7 +71,6 @@ Compressor::Compressor() : m(*new Compressor::Private())
Compressor::~Compressor()
{
delete &m;
cuda::exit();
}
@ -83,15 +82,6 @@ void Compressor::enableCudaAcceleration(bool enable)
}
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device. @@ This is done automatically on current CUDA versions.
int device = cuda::getFastestDevice();
if (!cuda::setDevice(device))
{
m.cudaEnabled = false;
m.cuda = NULL;
}
else
{
m.cuda = new CudaContext();
@ -102,7 +92,6 @@ void Compressor::enableCudaAcceleration(bool enable)
}
}
}
}
bool Compressor::isCudaAccelerationEnabled() const
{

View File

56
src/nvtt/TaskDispatcher.h Normal file
View File

@ -0,0 +1,56 @@
#include "nvtt.h"
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
#if NV_OS_DARWIN
//#if defined(HAVE_DISPATCH)
#include <dispatch/dispatch.h>
#endif
namespace nvtt {
struct SequentialTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
for (size_t i = 0; i < count; i++) {
task(context, i);
}
}
};
#if NV_OS_DARWIN
// Task dispatcher using Apple's Grand Central Dispatch.
struct AppleTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
dispatch_apply_f(count, q, context, task);
}
};
#endif
#if defined(HAVE_OPENMP)
struct OpenMPTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
#pragma omp parallel
{
#pragma omp for
for (size_t i = 0; i < count; i++) {
task(context, i);
}
}
}
};
#endif
} // namespace nvtt

View File

@ -56,3 +56,5 @@ unsigned int nvtt::version()
return NVTT_VERSION;
}

View File

@ -26,6 +26,8 @@
#ifndef NVTT_H
#define NVTT_H
#include <stddef.h> // size_t @@ Use or own define?
// Function linkage
#if NVTT_SHARED
@ -331,6 +333,12 @@ namespace nvtt
NVTT_API void setUserVersion(int version);
};
typedef void Task(void * context, size_t id);
struct TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) = 0;
};
/// Context.
struct Compressor
@ -344,6 +352,7 @@ namespace nvtt
// Context settings.
NVTT_API void enableCudaAcceleration(bool enable);
NVTT_API bool isCudaAccelerationEnabled() const;
NVTT_API void setTaskDispatcher(TaskDispatcher * disp);
// InputOptions API.
NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;