concurrency experiments.

This commit is contained in:
castano 2010-11-22 07:34:05 +00:00
parent 0bb3011f7f
commit c51f25f38d
8 changed files with 298 additions and 188 deletions

View File

@ -1,41 +1,28 @@
PROJECT(nvtt)
ADD_SUBDIRECTORY(squish)
ADD_SUBDIRECTORY(bc6h)
#ADD_SUBDIRECTORY(bc6h)
#ADD_SUBDIRECTORY(bc7)
SET(NVTT_SRCS
nvtt.h
nvtt.cpp
Context.h
Context.cpp
nvtt_wrapper.h
nvtt_wrapper.cpp
nvtt.h nvtt.cpp
nvtt_wrapper.h nvtt_wrapper.cpp
ClusterFit.h ClusterFit.cpp
Compressor.h
CompressorDXT.h
CompressorDXT.cpp
CompressorDX9.h
CompressorDX9.cpp
CompressorDX10.h
CompressorDX10.cpp
CompressorDX11.h
CompressorDX11.cpp
CompressorRGB.h
CompressorRGB.cpp
CompressorRGBE.h
CompressorRGBE.cpp
QuickCompressDXT.h
QuickCompressDXT.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h
SingleColorLookup.cpp
CompressionOptions.h
CompressionOptions.cpp
InputOptions.h
InputOptions.cpp
OutputOptions.h
OutputOptions.cpp
CompressorDXT.h CompressorDXT.cpp
CompressorDX9.h CompressorDX9.cpp
CompressorDX10.h CompressorDX10.cpp
# CompressorDX11.h CompressorDX11.cpp
CompressorRGB.h CompressorRGB.cpp
CompressorRGBE.h CompressorRGBE.cpp
Context.h Context.cpp
QuickCompressDXT.h QuickCompressDXT.cpp
OptimalCompressDXT.h OptimalCompressDXT.cpp
SingleColorLookup.h SingleColorLookup.cpp
CompressionOptions.h CompressionOptions.cpp
InputOptions.h InputOptions.cpp
OutputOptions.h OutputOptions.cpp
TaskDispatcher.h TaskDispatcher.cpp
TexImage.h TexImage.cpp
cuda/CudaUtils.h
cuda/CudaUtils.cpp
@ -62,7 +49,7 @@ ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish bc6h)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvmath nvimage squish)
INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin

View File

@ -28,6 +28,8 @@
#include "nvmath/Fitting.h"
#include "nvimage/ColorBlock.h"
#include <float.h> // FLT_MAX
using namespace nv;
ClusterFit::ClusterFit()

View File

@ -1,143 +1,208 @@
// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorDXT.h"
#include "OutputOptions.h"
#include "nvtt.h"
#include "nvcore/Memory.h"
#include "nvimage/Image.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include <new> // placement new
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
using namespace nv;
using namespace nvtt;
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
#if defined(HAVE_OPENMP)
bool singleThreaded = false;
#else
bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{
nvDebugCheck(bs <= 16);
uint8 mem[16]; // @@ Output one row at a time!
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
rgba.init(w, h, data, x, y);
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs);
}
}
}
}
#if defined(HAVE_OPENMP)
else
{
const uint size = bs * bw * bh;
uint8 * mem = new uint8[size];
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < int(bw*bh); i++)
{
const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
rgba.init(w, h, data, 4*x, 4*y);
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
}
#endif
}
//#include "bc6h/tile.h"
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
//bool singleThreaded = true;
//if (singleThreaded)
{
uint8 * mem = malloc<uint8>(bs * bw);
uint8 * ptr = mem;
ColorSet set;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4, ptr += bs) {
set.setColors(data, w, h, x, y);
compressBlock(set, alphaMode, compressionOptions, ptr);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs * bw);
}
}
free(mem);
}
}
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "CompressorDXT.h"
#include "OutputOptions.h"
#include "nvtt.h"
#include "TaskDispatcher.h"
#include "nvcore/Memory.h"
#include "nvimage/Image.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include <new> // placement new
using namespace nv;
using namespace nvtt;
/*
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
#if defined(HAVE_OPENMP)
bool singleThreaded = false;
#else
bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{
nvDebugCheck(bs <= 16);
uint8 mem[16]; // @@ Output one row at a time!
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
rgba.init(w, h, data, x, y);
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs);
}
}
}
}
#if defined(HAVE_OPENMP)
else
{
const uint size = bs * bw * bh;
uint8 * mem = new uint8[size];
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < int(bw*bh); i++)
{
const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
rgba.init(w, h, data, 4*x, 4*y);
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
}
#endif
}
*/
struct CompressorContext
{
nvtt::AlphaMode alphaMode;
uint w, h;
const float * data;
const nvtt::CompressionOptions::Private * compressionOptions;
uint bw, bh, bs;
uint8 * mem;
FixedBlockCompressor * compressor;
};
// Each task compresses one row.
void CompressorTask(void * data, size_t y)
{
CompressorContext * d = (CompressorContext *) data;
for (uint x = 0; x < d->bw; x++)
{
ColorBlock rgba;
rgba.init(d->w, d->h, d->data, 4*x, 4*y);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr);
}
}
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
CompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
context.compressor = this;
static SequentialTaskDispatcher sequential;
static AppleTaskDispatcher concurrent;
//TaskDispatcher * dispatcher = &sequential;
TaskDispatcher * dispatcher = &concurrent;
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(CompressorTask, &context, context.bh);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
//bool singleThreaded = true;
//if (singleThreaded)
{
uint8 * mem = malloc<uint8>(bs * bw);
uint8 * ptr = mem;
ColorSet set;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4, ptr += bs) {
set.setColors(data, w, h, x, y);
compressBlock(set, alphaMode, compressionOptions, ptr);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs * bw);
}
}
free(mem);
}
}

View File

@ -56,7 +56,7 @@
using namespace nv;
using namespace nvtt;
#include <dispatch/dispatch.h>
Compressor::Compressor() : m(*new Compressor::Private())
{
@ -71,7 +71,6 @@ Compressor::Compressor() : m(*new Compressor::Private())
Compressor::~Compressor()
{
delete &m;
cuda::exit();
}
@ -84,23 +83,13 @@ void Compressor::enableCudaAcceleration(bool enable)
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device. @@ This is done automatically on current CUDA versions.
int device = cuda::getFastestDevice();
if (!cuda::setDevice(device))
m.cuda = new CudaContext();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
else
{
m.cuda = new CudaContext();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
}

View File

56
src/nvtt/TaskDispatcher.h Normal file
View File

@ -0,0 +1,56 @@
#include "nvtt.h"
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
#if NV_OS_DARWIN
//#if defined(HAVE_DISPATCH)
#include <dispatch/dispatch.h>
#endif
namespace nvtt {
struct SequentialTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
for (size_t i = 0; i < count; i++) {
task(context, i);
}
}
};
#if NV_OS_DARWIN
// Task dispatcher using Apple's Grand Central Dispatch.
struct AppleTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
dispatch_apply_f(count, q, context, task);
}
};
#endif
#if defined(HAVE_OPENMP)
struct OpenMPTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) {
#pragma omp parallel
{
#pragma omp for
for (size_t i = 0; i < count; i++) {
task(context, i);
}
}
}
};
#endif
} // namespace nvtt

View File

@ -56,3 +56,5 @@ unsigned int nvtt::version()
return NVTT_VERSION;
}

View File

@ -26,6 +26,8 @@
#ifndef NVTT_H
#define NVTT_H
#include <stddef.h> // size_t @@ Use or own define?
// Function linkage
#if NVTT_SHARED
@ -331,6 +333,12 @@ namespace nvtt
NVTT_API void setUserVersion(int version);
};
typedef void Task(void * context, size_t id);
struct TaskDispatcher
{
virtual void dispatch(Task * task, void * context, size_t count) = 0;
};
/// Context.
struct Compressor
@ -344,6 +352,7 @@ namespace nvtt
// Context settings.
NVTT_API void enableCudaAcceleration(bool enable);
NVTT_API bool isCudaAccelerationEnabled() const;
NVTT_API void setTaskDispatcher(TaskDispatcher * disp);
// InputOptions API.
NVTT_API bool process(const InputOptions & inputOptions, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;