concurrency experiments.

This commit is contained in:
castano 2010-11-22 07:34:05 +00:00
parent 0bb3011f7f
commit c51f25f38d
8 changed files with 298 additions and 188 deletions

View File

@ -1,41 +1,28 @@
PROJECT(nvtt) PROJECT(nvtt)
ADD_SUBDIRECTORY(squish) ADD_SUBDIRECTORY(squish)
ADD_SUBDIRECTORY(bc6h) #ADD_SUBDIRECTORY(bc6h)
#ADD_SUBDIRECTORY(bc7) #ADD_SUBDIRECTORY(bc7)
SET(NVTT_SRCS SET(NVTT_SRCS
nvtt.h nvtt.h nvtt.cpp
nvtt.cpp nvtt_wrapper.h nvtt_wrapper.cpp
Context.h ClusterFit.h ClusterFit.cpp
Context.cpp
nvtt_wrapper.h
nvtt_wrapper.cpp
Compressor.h Compressor.h
CompressorDXT.h CompressorDXT.h CompressorDXT.cpp
CompressorDXT.cpp CompressorDX9.h CompressorDX9.cpp
CompressorDX9.h CompressorDX10.h CompressorDX10.cpp
CompressorDX9.cpp # CompressorDX11.h CompressorDX11.cpp
CompressorDX10.h CompressorRGB.h CompressorRGB.cpp
CompressorDX10.cpp CompressorRGBE.h CompressorRGBE.cpp
CompressorDX11.h Context.h Context.cpp
CompressorDX11.cpp QuickCompressDXT.h QuickCompressDXT.cpp
CompressorRGB.h OptimalCompressDXT.h OptimalCompressDXT.cpp
CompressorRGB.cpp SingleColorLookup.h SingleColorLookup.cpp
CompressorRGBE.h CompressionOptions.h CompressionOptions.cpp
CompressorRGBE.cpp InputOptions.h InputOptions.cpp
QuickCompressDXT.h OutputOptions.h OutputOptions.cpp
QuickCompressDXT.cpp TaskDispatcher.h TaskDispatcher.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h
SingleColorLookup.cpp
CompressionOptions.h
CompressionOptions.cpp
InputOptions.h
InputOptions.cpp
OutputOptions.h
OutputOptions.cpp
TexImage.h TexImage.cpp TexImage.h TexImage.cpp
cuda/CudaUtils.h cuda/CudaUtils.h
cuda/CudaUtils.cpp cuda/CudaUtils.cpp
@ -62,7 +49,7 @@ ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS}) ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED) ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish bc6h) TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish)
INSTALL(TARGETS nvtt INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin RUNTIME DESTINATION bin

View File

@ -28,6 +28,8 @@
#include "nvmath/Fitting.h" #include "nvmath/Fitting.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
#include <float.h> // FLT_MAX
using namespace nv; using namespace nv;
ClusterFit::ClusterFit() ClusterFit::ClusterFit()

View File

@ -1,143 +1,208 @@
// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com> // Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com> // Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
// //
// Permission is hereby granted, free of charge, to any person // Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation // obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without // files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use, // restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell // copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the // copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following // Software is furnished to do so, subject to the following
// conditions: // conditions:
// //
// The above copyright notice and this permission notice shall be // The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software. // included in all copies or substantial portions of the Software.
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorDXT.h" #include "CompressorDXT.h"
#include "OutputOptions.h" #include "OutputOptions.h"
#include "nvtt.h" #include "nvtt.h"
#include "TaskDispatcher.h"
#include "nvcore/Memory.h"
#include "nvcore/Memory.h"
#include "nvimage/Image.h"
#include "nvimage/ColorBlock.h" #include "nvimage/Image.h"
#include "nvimage/BlockDXT.h" #include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include <new> // placement new
#include <new> // placement new
// OpenMP
#if defined(HAVE_OPENMP) using namespace nv;
#include <omp.h> using namespace nvtt;
#endif
/*
using namespace nv; // OpenMP
using namespace nvtt; #if defined(HAVE_OPENMP)
#include <omp.h>
#endif
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
const uint bs = blockSize(); {
const uint bw = (w + 3) / 4; const uint bs = blockSize();
const uint bh = (h + 3) / 4; const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
#if defined(HAVE_OPENMP)
bool singleThreaded = false; #if defined(HAVE_OPENMP)
#else bool singleThreaded = false;
bool singleThreaded = true; #else
#endif bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true; // Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{ if (singleThreaded)
nvDebugCheck(bs <= 16); {
uint8 mem[16]; // @@ Output one row at a time! nvDebugCheck(bs <= 16);
uint8 mem[16]; // @@ Output one row at a time!
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) { for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
rgba.init(w, h, data, x, y); ColorBlock rgba;
rgba.init(w, h, data, x, y);
compressBlock(rgba, alphaMode, compressionOptions, mem);
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs); if (outputOptions.outputHandler != NULL) {
} outputOptions.outputHandler->writeData(mem, bs);
} }
} }
} }
#if defined(HAVE_OPENMP) }
else #if defined(HAVE_OPENMP)
{ else
const uint size = bs * bw * bh; {
uint8 * mem = new uint8[size]; const uint size = bs * bw * bh;
uint8 * mem = new uint8[size];
#pragma omp parallel
{ #pragma omp parallel
#pragma omp for {
for (int i = 0; i < int(bw*bh); i++) #pragma omp for
{ for (int i = 0; i < int(bw*bh); i++)
const uint x = i % bw; {
const uint y = i / bw; const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
rgba.init(w, h, data, 4*x, 4*y); ColorBlock rgba;
rgba.init(w, h, data, 4*x, 4*y);
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr); uint8 * ptr = mem + (y * bw + x) * bs;
} // omp for compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp parallel } // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size); if (outputOptions.outputHandler != NULL) {
} outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
} delete [] mem;
#endif }
} #endif
}
*/
//#include "bc6h/tile.h"
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) struct CompressorContext
{ {
const uint bs = blockSize(); nvtt::AlphaMode alphaMode;
const uint bw = (w + 3) / 4; uint w, h;
const uint bh = (h + 3) / 4; const float * data;
const nvtt::CompressionOptions::Private * compressionOptions;
//bool singleThreaded = true;
//if (singleThreaded) uint bw, bh, bs;
{ uint8 * mem;
uint8 * mem = malloc<uint8>(bs * bw); FixedBlockCompressor * compressor;
uint8 * ptr = mem; };
ColorSet set; // Each task compresses one row.
void CompressorTask(void * data, size_t y)
for (uint y = 0; y < h; y += 4) { {
for (uint x = 0; x < w; x += 4, ptr += bs) { CompressorContext * d = (CompressorContext *) data;
set.setColors(data, w, h, x, y);
compressBlock(set, alphaMode, compressionOptions, ptr); for (uint x = 0; x < d->bw; x++)
} {
ColorBlock rgba;
if (outputOptions.outputHandler != NULL) { rgba.init(d->w, d->h, d->data, 4*x, 4*y);
outputOptions.outputHandler->writeData(mem, bs * bw);
} uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
} d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr);
}
free(mem); }
}
} void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
CompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
context.compressor = this;
static SequentialTaskDispatcher sequential;
static AppleTaskDispatcher concurrent;
//TaskDispatcher * dispatcher = &sequential;
TaskDispatcher * dispatcher = &concurrent;
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(CompressorTask, &context, context.bh);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
//bool singleThreaded = true;
//if (singleThreaded)
{
uint8 * mem = malloc<uint8>(bs * bw);
uint8 * ptr = mem;
ColorSet set;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4, ptr += bs) {
set.setColors(data, w, h, x, y);
compressBlock(set, alphaMode, compressionOptions, ptr);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs * bw);
}
}
free(mem);
}
}

View File

@ -56,7 +56,7 @@
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
#include <dispatch/dispatch.h>
Compressor::Compressor() : m(*new Compressor::Private()) Compressor::Compressor() : m(*new Compressor::Private())
{ {
@ -71,7 +71,6 @@ Compressor::Compressor() : m(*new Compressor::Private())
Compressor::~Compressor() Compressor::~Compressor()
{ {
delete &m; delete &m;
cuda::exit();
} }
@ -84,23 +83,13 @@ void Compressor::enableCudaAcceleration(bool enable)
if (m.cudaEnabled && m.cuda == NULL) if (m.cudaEnabled && m.cuda == NULL)
{ {
// Select fastest CUDA device. @@ This is done automatically on current CUDA versions. m.cuda = new CudaContext();
int device = cuda::getFastestDevice();
if (!cuda::setDevice(device)) if (!m.cuda->isValid())
{ {
m.cudaEnabled = false; m.cudaEnabled = false;
m.cuda = NULL; m.cuda = NULL;
} }
else
{
m.cuda = new CudaContext();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
} }
} }

View File

56
src/nvtt/TaskDispatcher.h Normal file
View File

@ -0,0 +1,56 @@
#include "nvtt.h"
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
#if NV_OS_DARWIN
//#if defined(HAVE_DISPATCH)
#include <dispatch/dispatch.h>
#endif
namespace nvtt {
struct SequentialTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
for (size_t i = 0; i < count; i++) {
task(context, i);
}
}
};
#if NV_OS_DARWIN
// Task dispatcher using Apple's Grand Central Dispatch.
struct AppleTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
dispatch_apply_f(count, q, context, task);
}
};
#endif
#if defined(HAVE_OPENMP)
struct OpenMPTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
#pragma omp parallel
{
#pragma omp for
for (size_t i = 0; i < count; i++) {
task(context, i);
}
}
}
};
#endif
} // namespace nvtt

View File

@ -56,3 +56,5 @@ unsigned int nvtt::version()
return NVTT_VERSION; return NVTT_VERSION;
} }

View File

@ -26,6 +26,8 @@
#ifndef NVTT_H #ifndef NVTT_H
#define NVTT_H #define NVTT_H
#include <stddef.h> // size_t @@ Use or own define?
// Function linkage // Function linkage
#if NVTT_SHARED #if NVTT_SHARED
@ -331,6 +333,12 @@ namespace nvtt
NVTT_API void setUserVersion(int version); NVTT_API void setUserVersion(int version);
}; };
typedef void Task(void * context, size_t id);
struct TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) = 0;
};
/// Context. /// Context.
struct Compressor struct Compressor
@ -344,6 +352,7 @@ namespace nvtt
// Context settings. // Context settings.
NVTT_API void enableCudaAcceleration(bool enable); NVTT_API void enableCudaAcceleration(bool enable);
NVTT_API bool isCudaAccelerationEnabled() const; NVTT_API bool isCudaAccelerationEnabled() const;
NVTT_API void setTaskDispatcher(TaskDispatcher * disp);
// InputOptions API. // InputOptions API.
NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const; NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;