concurrency experiments.
parent
0bb3011f7f
commit
c51f25f38d
@ -1,143 +1,208 @@
|
|||||||
// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
|
// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
|
||||||
// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
|
// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
|
||||||
//
|
//
|
||||||
// Permission is hereby granted, free of charge, to any person
|
// Permission is hereby granted, free of charge, to any person
|
||||||
// obtaining a copy of this software and associated documentation
|
// obtaining a copy of this software and associated documentation
|
||||||
// files (the "Software"), to deal in the Software without
|
// files (the "Software"), to deal in the Software without
|
||||||
// restriction, including without limitation the rights to use,
|
// restriction, including without limitation the rights to use,
|
||||||
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
// copies of the Software, and to permit persons to whom the
|
// copies of the Software, and to permit persons to whom the
|
||||||
// Software is furnished to do so, subject to the following
|
// Software is furnished to do so, subject to the following
|
||||||
// conditions:
|
// conditions:
|
||||||
//
|
//
|
||||||
// The above copyright notice and this permission notice shall be
|
// The above copyright notice and this permission notice shall be
|
||||||
// included in all copies or substantial portions of the Software.
|
// included in all copies or substantial portions of the Software.
|
||||||
//
|
//
|
||||||
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
// OTHER DEALINGS IN THE SOFTWARE.
|
// OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
#include "CompressorDXT.h"
|
#include "CompressorDXT.h"
|
||||||
#include "OutputOptions.h"
|
#include "OutputOptions.h"
|
||||||
|
|
||||||
#include "nvtt.h"
|
#include "nvtt.h"
|
||||||
|
#include "TaskDispatcher.h"
|
||||||
#include "nvcore/Memory.h"
|
|
||||||
|
#include "nvcore/Memory.h"
|
||||||
#include "nvimage/Image.h"
|
|
||||||
#include "nvimage/ColorBlock.h"
|
#include "nvimage/Image.h"
|
||||||
#include "nvimage/BlockDXT.h"
|
#include "nvimage/ColorBlock.h"
|
||||||
|
#include "nvimage/BlockDXT.h"
|
||||||
#include <new> // placement new
|
|
||||||
|
#include <new> // placement new
|
||||||
|
|
||||||
// OpenMP
|
|
||||||
#if defined(HAVE_OPENMP)
|
using namespace nv;
|
||||||
#include <omp.h>
|
using namespace nvtt;
|
||||||
#endif
|
|
||||||
|
/*
|
||||||
using namespace nv;
|
// OpenMP
|
||||||
using namespace nvtt;
|
#if defined(HAVE_OPENMP)
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
|
||||||
{
|
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||||
const uint bs = blockSize();
|
{
|
||||||
const uint bw = (w + 3) / 4;
|
const uint bs = blockSize();
|
||||||
const uint bh = (h + 3) / 4;
|
const uint bw = (w + 3) / 4;
|
||||||
|
const uint bh = (h + 3) / 4;
|
||||||
#if defined(HAVE_OPENMP)
|
|
||||||
bool singleThreaded = false;
|
#if defined(HAVE_OPENMP)
|
||||||
#else
|
bool singleThreaded = false;
|
||||||
bool singleThreaded = true;
|
#else
|
||||||
#endif
|
bool singleThreaded = true;
|
||||||
|
#endif
|
||||||
// Use a single thread to compress small textures.
|
|
||||||
if (bw * bh < 16) singleThreaded = true;
|
// Use a single thread to compress small textures.
|
||||||
|
if (bw * bh < 16) singleThreaded = true;
|
||||||
if (singleThreaded)
|
|
||||||
{
|
if (singleThreaded)
|
||||||
nvDebugCheck(bs <= 16);
|
{
|
||||||
uint8 mem[16]; // @@ Output one row at a time!
|
nvDebugCheck(bs <= 16);
|
||||||
|
uint8 mem[16]; // @@ Output one row at a time!
|
||||||
for (int y = 0; y < int(h); y += 4) {
|
|
||||||
for (uint x = 0; x < w; x += 4) {
|
for (int y = 0; y < int(h); y += 4) {
|
||||||
|
for (uint x = 0; x < w; x += 4) {
|
||||||
ColorBlock rgba;
|
|
||||||
rgba.init(w, h, data, x, y);
|
ColorBlock rgba;
|
||||||
|
rgba.init(w, h, data, x, y);
|
||||||
compressBlock(rgba, alphaMode, compressionOptions, mem);
|
|
||||||
|
compressBlock(rgba, alphaMode, compressionOptions, mem);
|
||||||
if (outputOptions.outputHandler != NULL) {
|
|
||||||
outputOptions.outputHandler->writeData(mem, bs);
|
if (outputOptions.outputHandler != NULL) {
|
||||||
}
|
outputOptions.outputHandler->writeData(mem, bs);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#if defined(HAVE_OPENMP)
|
}
|
||||||
else
|
#if defined(HAVE_OPENMP)
|
||||||
{
|
else
|
||||||
const uint size = bs * bw * bh;
|
{
|
||||||
uint8 * mem = new uint8[size];
|
const uint size = bs * bw * bh;
|
||||||
|
uint8 * mem = new uint8[size];
|
||||||
#pragma omp parallel
|
|
||||||
{
|
#pragma omp parallel
|
||||||
#pragma omp for
|
{
|
||||||
for (int i = 0; i < int(bw*bh); i++)
|
#pragma omp for
|
||||||
{
|
for (int i = 0; i < int(bw*bh); i++)
|
||||||
const uint x = i % bw;
|
{
|
||||||
const uint y = i / bw;
|
const uint x = i % bw;
|
||||||
|
const uint y = i / bw;
|
||||||
ColorBlock rgba;
|
|
||||||
rgba.init(w, h, data, 4*x, 4*y);
|
ColorBlock rgba;
|
||||||
|
rgba.init(w, h, data, 4*x, 4*y);
|
||||||
uint8 * ptr = mem + (y * bw + x) * bs;
|
|
||||||
compressBlock(rgba, alphaMode, compressionOptions, ptr);
|
uint8 * ptr = mem + (y * bw + x) * bs;
|
||||||
} // omp for
|
compressBlock(rgba, alphaMode, compressionOptions, ptr);
|
||||||
} // omp parallel
|
} // omp for
|
||||||
|
} // omp parallel
|
||||||
if (outputOptions.outputHandler != NULL) {
|
|
||||||
outputOptions.outputHandler->writeData(mem, size);
|
if (outputOptions.outputHandler != NULL) {
|
||||||
}
|
outputOptions.outputHandler->writeData(mem, size);
|
||||||
|
}
|
||||||
delete [] mem;
|
|
||||||
}
|
delete [] mem;
|
||||||
#endif
|
}
|
||||||
}
|
#endif
|
||||||
|
}
|
||||||
|
*/
|
||||||
//#include "bc6h/tile.h"
|
|
||||||
|
|
||||||
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
struct CompressorContext
|
||||||
{
|
{
|
||||||
const uint bs = blockSize();
|
nvtt::AlphaMode alphaMode;
|
||||||
const uint bw = (w + 3) / 4;
|
uint w, h;
|
||||||
const uint bh = (h + 3) / 4;
|
const float * data;
|
||||||
|
const nvtt::CompressionOptions::Private * compressionOptions;
|
||||||
//bool singleThreaded = true;
|
|
||||||
//if (singleThreaded)
|
uint bw, bh, bs;
|
||||||
{
|
uint8 * mem;
|
||||||
uint8 * mem = malloc<uint8>(bs * bw);
|
FixedBlockCompressor * compressor;
|
||||||
uint8 * ptr = mem;
|
};
|
||||||
|
|
||||||
ColorSet set;
|
// Each task compresses one row.
|
||||||
|
void CompressorTask(void * data, size_t y)
|
||||||
for (uint y = 0; y < h; y += 4) {
|
{
|
||||||
for (uint x = 0; x < w; x += 4, ptr += bs) {
|
CompressorContext * d = (CompressorContext *) data;
|
||||||
set.setColors(data, w, h, x, y);
|
|
||||||
compressBlock(set, alphaMode, compressionOptions, ptr);
|
for (uint x = 0; x < d->bw; x++)
|
||||||
}
|
{
|
||||||
|
ColorBlock rgba;
|
||||||
if (outputOptions.outputHandler != NULL) {
|
rgba.init(d->w, d->h, d->data, 4*x, 4*y);
|
||||||
outputOptions.outputHandler->writeData(mem, bs * bw);
|
|
||||||
}
|
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
|
||||||
}
|
d->compressor->compressBlock(rgba, d->alphaMode, *d->compressionOptions, ptr);
|
||||||
|
}
|
||||||
free(mem);
|
}
|
||||||
}
|
|
||||||
}
|
void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||||
|
{
|
||||||
|
CompressorContext context;
|
||||||
|
context.alphaMode = alphaMode;
|
||||||
|
context.w = w;
|
||||||
|
context.h = h;
|
||||||
|
context.data = data;
|
||||||
|
context.compressionOptions = &compressionOptions;
|
||||||
|
|
||||||
|
context.bs = blockSize();
|
||||||
|
context.bw = (w + 3) / 4;
|
||||||
|
context.bh = (h + 3) / 4;
|
||||||
|
|
||||||
|
context.compressor = this;
|
||||||
|
|
||||||
|
static SequentialTaskDispatcher sequential;
|
||||||
|
static AppleTaskDispatcher concurrent;
|
||||||
|
|
||||||
|
//TaskDispatcher * dispatcher = &sequential;
|
||||||
|
TaskDispatcher * dispatcher = &concurrent;
|
||||||
|
|
||||||
|
// Use a single thread to compress small textures.
|
||||||
|
if (context.bh < 4) dispatcher = &sequential;
|
||||||
|
|
||||||
|
const uint count = context.bw * context.bh;
|
||||||
|
const uint size = context.bs * count;
|
||||||
|
context.mem = new uint8[size];
|
||||||
|
|
||||||
|
dispatcher->dispatch(CompressorTask, &context, context.bh);
|
||||||
|
|
||||||
|
outputOptions.writeData(context.mem, size);
|
||||||
|
|
||||||
|
delete [] context.mem;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||||
|
{
|
||||||
|
const uint bs = blockSize();
|
||||||
|
const uint bw = (w + 3) / 4;
|
||||||
|
const uint bh = (h + 3) / 4;
|
||||||
|
|
||||||
|
//bool singleThreaded = true;
|
||||||
|
//if (singleThreaded)
|
||||||
|
{
|
||||||
|
uint8 * mem = malloc<uint8>(bs * bw);
|
||||||
|
uint8 * ptr = mem;
|
||||||
|
|
||||||
|
ColorSet set;
|
||||||
|
|
||||||
|
for (uint y = 0; y < h; y += 4) {
|
||||||
|
for (uint x = 0; x < w; x += 4, ptr += bs) {
|
||||||
|
set.setColors(data, w, h, x, y);
|
||||||
|
compressBlock(set, alphaMode, compressionOptions, ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (outputOptions.outputHandler != NULL) {
|
||||||
|
outputOptions.outputHandler->writeData(mem, bs * bw);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
free(mem);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -0,0 +1,56 @@
|
|||||||
|
|
||||||
|
#include "nvtt.h"
|
||||||
|
|
||||||
|
// OpenMP
|
||||||
|
#if defined(HAVE_OPENMP)
|
||||||
|
#include <omp.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if NV_OS_DARWIN
|
||||||
|
//#if defined(HAVE_DISPATCH)
|
||||||
|
#include <dispatch/dispatch.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
namespace nvtt {
|
||||||
|
|
||||||
|
struct SequentialTaskDispatcher : public TaskDispatcher
|
||||||
|
{
|
||||||
|
virtual void dispatch(Task * task, void * context, size_t count) {
|
||||||
|
for (size_t i = 0; i < count; i++) {
|
||||||
|
task(context, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#if NV_OS_DARWIN
|
||||||
|
|
||||||
|
// Task dispatcher using Apple's Grand Central Dispatch.
|
||||||
|
struct AppleTaskDispatcher : public TaskDispatcher
|
||||||
|
{
|
||||||
|
virtual void dispatch(Task * task, void * context, size_t count) {
|
||||||
|
dispatch_queue_t q = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_HIGH, 0);
|
||||||
|
dispatch_apply_f(count, q, context, task);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if defined(HAVE_OPENMP)
|
||||||
|
|
||||||
|
struct OpenMPTaskDispatcher : public TaskDispatcher
|
||||||
|
{
|
||||||
|
virtual void dispatch(Task * task, void * context, size_t count) {
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
#pragma omp for
|
||||||
|
for (size_t i = 0; i < count; i++) {
|
||||||
|
task(context, i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
} // namespace nvtt
|
Loading…
Reference in New Issue