Large refactoring of compressor codes:
- Define compressor interface. - Implement compressor interface for different compressors. - Add parallel compressor using OpenMP. Experimental. - Add generic GPU compressor, so far only DXT1 enabled.
This commit is contained in:
parent
18a3abf794
commit
8820c43175
@ -71,453 +71,326 @@ typedef ULONG_PTR DWORD_PTR;
|
||||
#include "stb/stb_dxt.h"
|
||||
#endif
|
||||
|
||||
#pragma message(NV_FILE_LINE "FIXME: Define HAVE_OPENMP from cmake.")
|
||||
#define HAVE_OPENMP
|
||||
#include <omp.h>
|
||||
|
||||
using namespace nv;
|
||||
using namespace nvtt;
|
||||
|
||||
|
||||
nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
|
||||
void FixedBlockCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
}
|
||||
const uint bs = blockSize();
|
||||
const uint bw = (w + 3) / 4;
|
||||
const uint bh = (h + 3) / 4;
|
||||
const uint size = bs * bw * bh;
|
||||
|
||||
nv::FastCompressor::~FastCompressor()
|
||||
{
|
||||
}
|
||||
#if defined(HAVE_OPENMP)
|
||||
bool singleThreaded = false;
|
||||
#else
|
||||
bool singleThreaded = true;
|
||||
#endif
|
||||
|
||||
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
|
||||
{
|
||||
m_image = image;
|
||||
m_alphaMode = alphaMode;
|
||||
}
|
||||
// Use a single thread to compress small textures.
|
||||
if (bw * bh < 16) singleThreaded = true;
|
||||
|
||||
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT1 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
QuickCompress::compressDXT1(rgba, &block);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT1 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
QuickCompress::compressDXT1a(rgba, &block);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT3 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
QuickCompress::compressDXT3(rgba, &block);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT5 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
QuickCompress::compressDXT5(rgba, &block, 0);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT5 block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
rgba.swizzleDXT5n();
|
||||
|
||||
QuickCompress::compressDXT5(rgba, &block, 0);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
|
||||
{
|
||||
}
|
||||
|
||||
nv::SlowCompressor::~SlowCompressor()
|
||||
{
|
||||
}
|
||||
|
||||
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
|
||||
{
|
||||
m_image = image;
|
||||
m_alphaMode = alphaMode;
|
||||
}
|
||||
|
||||
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT1 block;
|
||||
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
//nvsquish::ClusterFit fit;
|
||||
//nvsquish::FastClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1(rgba.color(0), &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
|
||||
fit.SetColourSet(&colours, nvsquish::kDxt1);
|
||||
fit.Compress(&block);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT1 block;
|
||||
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
bool anyAlpha = false;
|
||||
bool allAlpha = true;
|
||||
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
if (rgba.color(i).a < 128) anyAlpha = true;
|
||||
else allAlpha = false;
|
||||
}
|
||||
|
||||
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
|
||||
{
|
||||
OptimalCompress::compressDXT1a(rgba.color(0), &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), nvsquish::kDxt1|nvsquish::kWeightColourByAlpha);
|
||||
fit.SetColourSet(&colours, nvsquish::kDxt1);
|
||||
fit.Compress(&block);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT3 block;
|
||||
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
int flags = 0;
|
||||
if (m_alphaMode == AlphaMode_Transparency)
|
||||
if (singleThreaded)
|
||||
{
|
||||
flags = nvsquish::kWeightColourByAlpha;
|
||||
}
|
||||
nvDebugCheck(bs <= 16);
|
||||
uint8 mem[16];
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
for (int y = 0; y < int(h); y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(m_image, x, y);
|
||||
ColorBlock rgba;
|
||||
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
|
||||
rgba.init(w, h, (uint *)data, x, y);
|
||||
}
|
||||
else {
|
||||
nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F);
|
||||
rgba.init(w, h, (float *)data, x, y);
|
||||
}
|
||||
|
||||
// Compress explicit alpha.
|
||||
OptimalCompress::compressDXT3A(rgba, &block.alpha);
|
||||
compressBlock(rgba, alphaMode, compressionOptions, mem);
|
||||
|
||||
// Compress color.
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block.color);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(mem, bs);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#if defined(HAVE_OPENMP)
|
||||
else
|
||||
{
|
||||
uint8 * mem = new uint8[size];
|
||||
|
||||
#pragma omp parallel
|
||||
{
|
||||
#pragma omp for
|
||||
for (int i = 0; i < int(bw*bh); i++)
|
||||
{
|
||||
const uint x = i % bw;
|
||||
const uint y = i / bw;
|
||||
|
||||
ColorBlock rgba;
|
||||
if (inputFormat == nvtt::InputFormat_BGRA_8UB) {
|
||||
rgba.init(w, h, (uint *)data, 4*x, 4*y);
|
||||
}
|
||||
else {
|
||||
nvDebugCheck(inputFormat == nvtt::InputFormat_RGBA_32F);
|
||||
rgba.init(w, h, (float *)data, 4*x, 4*y);
|
||||
}
|
||||
|
||||
uint8 * ptr = mem + (y * bw + x) * bs;
|
||||
compressBlock(rgba, alphaMode, compressionOptions, ptr);
|
||||
} // omp for
|
||||
} // omp parallel
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(mem, size);
|
||||
}
|
||||
|
||||
delete [] mem;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
|
||||
void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
BlockDXT1 * block = new(output) BlockDXT1;
|
||||
QuickCompress::compressDXT1(rgba, block);
|
||||
}
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT5 block;
|
||||
void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
BlockDXT1 * block = new(output) BlockDXT1;
|
||||
QuickCompress::compressDXT1a(rgba, block);
|
||||
}
|
||||
|
||||
void FastCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
BlockDXT3 * block = new(output) BlockDXT3;
|
||||
QuickCompress::compressDXT3(rgba, block);
|
||||
}
|
||||
|
||||
void FastCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
BlockDXT5 * block = new(output) BlockDXT5;
|
||||
QuickCompress::compressDXT5(rgba, block);
|
||||
}
|
||||
|
||||
void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R
|
||||
|
||||
BlockDXT5 * block = new(output) BlockDXT5;
|
||||
QuickCompress::compressDXT5(rgba, block);
|
||||
}
|
||||
|
||||
void FastCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
BlockATI1 * block = new(output) BlockATI1;
|
||||
|
||||
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
|
||||
QuickCompress::compressDXT5A(rgba, &block->alpha);
|
||||
}
|
||||
|
||||
void FastCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
BlockATI2 * block = new(output) BlockATI2;
|
||||
|
||||
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
|
||||
QuickCompress::compressDXT5A(rgba, &block->x);
|
||||
|
||||
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
|
||||
QuickCompress::compressDXT5A(rgba, &block->y);
|
||||
}
|
||||
|
||||
|
||||
void NormalCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
int flags = 0;
|
||||
if (m_alphaMode == AlphaMode_Transparency)
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
flags = nvsquish::kWeightColourByAlpha;
|
||||
BlockDXT1 * block = new(output) BlockDXT1;
|
||||
OptimalCompress::compressDXT1(rgba.color(0), block);
|
||||
}
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(m_image, x, y);
|
||||
|
||||
// Compress alpha.
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
OptimalCompress::compressDXT5A(rgba, &block.alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT5A(rgba, &block.alpha);
|
||||
}
|
||||
|
||||
// Compress color.
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block.color);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
|
||||
fit.SetColourSet(&colours, nvsquish::kDxt1);
|
||||
fit.Compress(output);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
void NormalCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
bool anyAlpha = false;
|
||||
bool allAlpha = true;
|
||||
|
||||
ColorBlock rgba;
|
||||
BlockDXT5 block;
|
||||
for (uint i = 0; i < 16; i++)
|
||||
{
|
||||
if (rgba.color(i).a < 128) anyAlpha = true;
|
||||
else allAlpha = false;
|
||||
}
|
||||
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(0, 1, 0);
|
||||
const bool isSingleColor = rgba.isSingleColor();
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
if ((!anyAlpha && isSingleColor || allAlpha))
|
||||
{
|
||||
BlockDXT1 * block = new(output) BlockDXT1;
|
||||
OptimalCompress::compressDXT1a(rgba.color(0), block);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
rgba.init(m_image, x, y);
|
||||
int flags = nvsquish::kDxt1;
|
||||
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||
|
||||
rgba.swizzleDXT5n();
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||
fit.SetColourSet(&colours, nvsquish::kDxt1);
|
||||
|
||||
// Compress X.
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
OptimalCompress::compressDXT5A(rgba, &block.alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT5A(rgba, &block.alpha);
|
||||
}
|
||||
|
||||
// Compress Y.
|
||||
//OptimalCompress::compressDXT1G(rgba, &block.color);
|
||||
|
||||
/*if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1G(rgba.color(0), &block.color);
|
||||
}
|
||||
else*/
|
||||
{
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block.color);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
fit.Compress(output);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
void NormalCompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
BlockDXT3 * block = new(output) BlockDXT3;
|
||||
|
||||
ColorBlock rgba;
|
||||
AlphaBlockDXT5 block;
|
||||
// Compress explicit alpha.
|
||||
OptimalCompress::compressDXT3A(rgba, &block->alpha);
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
// Compress color.
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
rgba.init(m_image, x, y);
|
||||
int flags = 0;
|
||||
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
OptimalCompress::compressDXT5A(rgba, &block);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT5A(rgba, &block);
|
||||
}
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
}
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block->color);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
void NormalCompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
const uint w = m_image->width();
|
||||
const uint h = m_image->height();
|
||||
BlockDXT5 * block = new(output) BlockDXT5;
|
||||
|
||||
ColorBlock xcolor;
|
||||
ColorBlock ycolor;
|
||||
// Compress alpha.
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
OptimalCompress::compressDXT5A(rgba, &block->alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT5A(rgba, &block->alpha);
|
||||
}
|
||||
|
||||
BlockATI2 block;
|
||||
// Compress color.
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
int flags = 0;
|
||||
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||
|
||||
xcolor.init(m_image, x, y);
|
||||
xcolor.splatX();
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block->color);
|
||||
}
|
||||
}
|
||||
|
||||
ycolor.init(m_image, x, y);
|
||||
ycolor.splatY();
|
||||
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
OptimalCompress::compressDXT5A(xcolor, &block.x);
|
||||
OptimalCompress::compressDXT5A(ycolor, &block.y);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT5A(xcolor, &block.x);
|
||||
QuickCompress::compressDXT5A(ycolor, &block.y);
|
||||
}
|
||||
void NormalCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
rgba.swizzle(4, 1, 5, 0); // 0xFF, G, 0, R
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&block, sizeof(block));
|
||||
}
|
||||
BlockDXT5 * block = new(output) BlockDXT5;
|
||||
|
||||
// Compress X.
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
OptimalCompress::compressDXT5A(rgba, &block->alpha);
|
||||
}
|
||||
else
|
||||
{
|
||||
QuickCompress::compressDXT5A(rgba, &block->alpha);
|
||||
}
|
||||
|
||||
// Compress Y.
|
||||
if (compressionOptions.quality == Quality_Highest)
|
||||
{
|
||||
OptimalCompress::compressDXT1G(rgba, &block->color);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (rgba.isSingleColor())
|
||||
{
|
||||
OptimalCompress::compressDXT1G(rgba.color(0), &block->color);
|
||||
}
|
||||
else
|
||||
{
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(0, 1, 0);
|
||||
|
||||
int flags = 0;
|
||||
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block->color);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ProductionCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
BlockATI1 * block = new(output) BlockATI1;
|
||||
|
||||
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
|
||||
OptimalCompress::compressDXT5A(rgba, &block->alpha);
|
||||
}
|
||||
|
||||
void ProductionCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
BlockATI2 * block = new(output) BlockATI2;
|
||||
|
||||
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
|
||||
OptimalCompress::compressDXT5A(rgba, &block->x);
|
||||
|
||||
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
|
||||
OptimalCompress::compressDXT5A(rgba, &block->y);
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if defined(HAVE_S3QUANT)
|
||||
|
||||
void nv::s3CompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
|
||||
void S3CompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
float error = 0.0f;
|
||||
|
||||
BlockDXT1 dxtBlock3;
|
||||
@ -526,7 +399,7 @@ void nv::s3CompressDXT1(const Image * image, const OutputOptions::Private & outp
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
block.init(image, x, y);
|
||||
block.init(inputFormat, w, h, data, x, y);
|
||||
|
||||
// Init rgb block.
|
||||
RGBBlock rgbBlock;
|
||||
@ -606,30 +479,47 @@ void nv::s3CompressDXT1(const Image * image, const OutputOptions::Private & outp
|
||||
|
||||
#if defined(HAVE_ATITC)
|
||||
|
||||
void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
|
||||
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
// Init source texture
|
||||
ATI_TC_Texture srcTexture;
|
||||
srcTexture.dwSize = sizeof(srcTexture);
|
||||
srcTexture.dwWidth = image->width();
|
||||
srcTexture.dwHeight = image->height();
|
||||
srcTexture.dwPitch = image->width() * 4;
|
||||
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
|
||||
srcTexture.dwWidth = w;
|
||||
srcTexture.dwHeight = h;
|
||||
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
|
||||
{
|
||||
srcTexture.dwPitch = w * 4;
|
||||
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
|
||||
}
|
||||
else
|
||||
{
|
||||
srcTexture.dwPitch = w * 16;
|
||||
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
|
||||
}
|
||||
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
|
||||
srcTexture.pData = (ATI_TC_BYTE*) image->pixels();
|
||||
srcTexture.pData = (ATI_TC_BYTE*) data;
|
||||
|
||||
// Init dest texture
|
||||
ATI_TC_Texture destTexture;
|
||||
destTexture.dwSize = sizeof(destTexture);
|
||||
destTexture.dwWidth = image->width();
|
||||
destTexture.dwHeight = image->height();
|
||||
destTexture.dwWidth = w;
|
||||
destTexture.dwHeight = h;
|
||||
destTexture.dwPitch = 0;
|
||||
destTexture.format = ATI_TC_FORMAT_DXT1;
|
||||
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
|
||||
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
|
||||
|
||||
ATI_TC_CompressOptions options;
|
||||
options.dwSize = sizeof(options);
|
||||
options.bUseChannelWeighting = false;
|
||||
options.bUseAdaptiveWeighting = false;
|
||||
options.bDXT1UseAlpha = false;
|
||||
options.nCompressionSpeed = ATI_TC_Speed_Normal;
|
||||
options.bDisableMultiThreading = false;
|
||||
//options.bDisableMultiThreading = true;
|
||||
|
||||
// Compress
|
||||
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
|
||||
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
|
||||
@ -638,23 +528,31 @@ void nv::atiCompressDXT1(const Image * image, const OutputOptions::Private & out
|
||||
mem::free(destTexture.pData);
|
||||
}
|
||||
|
||||
void nv::atiCompressDXT5(const Image * image, const OutputOptions::Private & outputOptions)
|
||||
void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
// Init source texture
|
||||
ATI_TC_Texture srcTexture;
|
||||
srcTexture.dwSize = sizeof(srcTexture);
|
||||
srcTexture.dwWidth = image->width();
|
||||
srcTexture.dwHeight = image->height();
|
||||
srcTexture.dwPitch = image->width() * 4;
|
||||
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
|
||||
srcTexture.dwWidth = w;
|
||||
srcTexture.dwHeight = h;
|
||||
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
|
||||
{
|
||||
srcTexture.dwPitch = w * 4;
|
||||
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
|
||||
}
|
||||
else
|
||||
{
|
||||
srcTexture.dwPitch = w * 16;
|
||||
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
|
||||
}
|
||||
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
|
||||
srcTexture.pData = (ATI_TC_BYTE*) image->pixels();
|
||||
srcTexture.pData = (ATI_TC_BYTE*) data;
|
||||
|
||||
// Init dest texture
|
||||
ATI_TC_Texture destTexture;
|
||||
destTexture.dwSize = sizeof(destTexture);
|
||||
destTexture.dwWidth = image->width();
|
||||
destTexture.dwHeight = image->height();
|
||||
destTexture.dwWidth = w;
|
||||
destTexture.dwHeight = h;
|
||||
destTexture.dwPitch = 0;
|
||||
destTexture.format = ATI_TC_FORMAT_DXT5;
|
||||
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
|
||||
@ -674,8 +572,10 @@ void nv::atiCompressDXT5(const Image * image, const OutputOptions::Private & out
|
||||
|
||||
#if defined(HAVE_SQUISH)
|
||||
|
||||
void nv::squishCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
|
||||
void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
|
||||
/*
|
||||
Image img(*image);
|
||||
int count = img.width() * img.height();
|
||||
for (int i = 0; i < count; i++)
|
||||
@ -694,6 +594,7 @@ void nv::squishCompressDXT1(const Image * image, const OutputOptions::Private &
|
||||
}
|
||||
|
||||
mem::free(blocks);
|
||||
*/
|
||||
}
|
||||
|
||||
#endif // defined(HAVE_SQUISH)
|
||||
@ -701,7 +602,7 @@ void nv::squishCompressDXT1(const Image * image, const OutputOptions::Private &
|
||||
|
||||
#if defined(HAVE_D3DX)
|
||||
|
||||
void nv::d3dxCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
|
||||
void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
|
||||
|
||||
@ -719,7 +620,7 @@ void nv::d3dxCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
|
||||
|
||||
IDirect3DTexture9 * texture = NULL;
|
||||
err = D3DXCreateTexture(device, image->width(), image->height(), 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
|
||||
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
|
||||
|
||||
IDirect3DSurface9 * surface = NULL;
|
||||
err = texture->GetSurfaceLevel(0, &surface);
|
||||
@ -727,10 +628,17 @@ void nv::d3dxCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
RECT rect;
|
||||
rect.left = 0;
|
||||
rect.top = 0;
|
||||
rect.bottom = image->height();
|
||||
rect.right = image->width();
|
||||
rect.bottom = h;
|
||||
rect.right = w;
|
||||
|
||||
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, image->pixels(), D3DFMT_A8R8G8B8, image->width() * sizeof(Color32), NULL, &rect, D3DX_DEFAULT, 0);
|
||||
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
|
||||
{
|
||||
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
|
||||
}
|
||||
else
|
||||
{
|
||||
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
|
||||
}
|
||||
|
||||
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
|
||||
{
|
||||
@ -740,7 +648,7 @@ void nv::d3dxCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
int size = rect.Pitch * ((image->height() + 3) / 4);
|
||||
int size = rect.Pitch * ((h + 3) / 4);
|
||||
outputOptions.outputHandler->writeData(rect.pBits, size);
|
||||
}
|
||||
|
||||
@ -757,28 +665,11 @@ void nv::d3dxCompressDXT1(const Image * image, const OutputOptions::Private & ou
|
||||
|
||||
#if defined(HAVE_STB)
|
||||
|
||||
void nv::stbCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions)
|
||||
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
float error = 0.0f;
|
||||
|
||||
BlockDXT1 dxtBlock;
|
||||
ColorBlock block;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
block.init(image, x, y);
|
||||
block.swizzleSTB();
|
||||
|
||||
stb_compress_dxt_block((unsigned char *)&dxtBlock, (unsigned char *)block.colors(), 0, 0);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(&dxtBlock, sizeof(dxtBlock));
|
||||
}
|
||||
}
|
||||
}
|
||||
rgba.swizzle(2, 1, 0, 3); // Swap R and B
|
||||
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
|
||||
}
|
||||
|
||||
|
||||
#endif // defined(HAVE_STB)
|
||||
|
@ -30,68 +30,153 @@
|
||||
namespace nv
|
||||
{
|
||||
class Image;
|
||||
class FloatImage;
|
||||
struct ColorBlock;
|
||||
|
||||
class FastCompressor
|
||||
struct CompressorInterface
|
||||
{
|
||||
public:
|
||||
FastCompressor();
|
||||
~FastCompressor();
|
||||
|
||||
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
|
||||
|
||||
void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
private:
|
||||
const Image * m_image;
|
||||
nvtt::AlphaMode m_alphaMode;
|
||||
virtual ~CompressorInterface() {}
|
||||
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0;
|
||||
};
|
||||
|
||||
class SlowCompressor
|
||||
struct FixedBlockCompressor : public CompressorInterface
|
||||
{
|
||||
public:
|
||||
SlowCompressor();
|
||||
~SlowCompressor();
|
||||
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
|
||||
|
||||
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
private:
|
||||
const Image * m_image;
|
||||
nvtt::AlphaMode m_alphaMode;
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
|
||||
virtual uint blockSize() const = 0;
|
||||
};
|
||||
|
||||
|
||||
// Fast CPU compressors.
|
||||
struct FastCompressorDXT1 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 8; }
|
||||
};
|
||||
|
||||
struct FastCompressorDXT1a : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 8; }
|
||||
};
|
||||
|
||||
struct FastCompressorDXT3 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; }
|
||||
};
|
||||
|
||||
struct FastCompressorDXT5 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; }
|
||||
};
|
||||
|
||||
struct FastCompressorDXT5n : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; }
|
||||
};
|
||||
|
||||
struct FastCompressorBC4 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 8; }
|
||||
};
|
||||
|
||||
struct FastCompressorBC5 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; }
|
||||
};
|
||||
|
||||
|
||||
// Normal CPU compressors.
|
||||
struct NormalCompressorDXT1 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 8; }
|
||||
};
|
||||
|
||||
struct NormalCompressorDXT1a : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 8; }
|
||||
};
|
||||
|
||||
struct NormalCompressorDXT3 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; }
|
||||
};
|
||||
|
||||
struct NormalCompressorDXT5 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; }
|
||||
};
|
||||
|
||||
struct NormalCompressorDXT5n : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; }
|
||||
};
|
||||
|
||||
|
||||
// Production CPU compressors.
|
||||
struct ProductionCompressorBC4 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 8; }
|
||||
};
|
||||
|
||||
struct ProductionCompressorBC5 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; }
|
||||
};
|
||||
|
||||
|
||||
// External compressors.
|
||||
#if defined(HAVE_S3QUANT)
|
||||
void s3CompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
struct S3CompressorDXT1 : public CompressorInterface
|
||||
{
|
||||
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ATITC)
|
||||
void atiCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void atiCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
struct AtiCompressorDXT1 : public CompressorInterface
|
||||
{
|
||||
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
};
|
||||
|
||||
struct AtiCompressorDXT5 : public CompressorInterface
|
||||
{
|
||||
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_SQUISH)
|
||||
void squishCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
struct SquishCompressorDXT1 : public CompressorInterface
|
||||
{
|
||||
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_D3DX)
|
||||
void d3dxCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
struct D3DXCompressorDXT1 : public CompressorInterface
|
||||
{
|
||||
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_D3DX)
|
||||
void stbCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
|
||||
#if defined(HAVE_STB)
|
||||
struct StbCompressorDXT1 : public FixedBlockCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 8; }
|
||||
};
|
||||
#endif
|
||||
|
||||
} // nv namespace
|
||||
|
@ -222,6 +222,7 @@ Compressor::Compressor() : m(*new Compressor::Private())
|
||||
|
||||
if (m.cudaEnabled)
|
||||
{
|
||||
#pragma message(NV_FILE_LINE "FIXME: This code is duplicated below.")
|
||||
// Select fastest CUDA device.
|
||||
int device = cuda::getFastestDevice();
|
||||
if (!cuda::setDevice(device))
|
||||
@ -231,7 +232,7 @@ Compressor::Compressor() : m(*new Compressor::Private())
|
||||
}
|
||||
else
|
||||
{
|
||||
m.cuda = new CudaCompressor();
|
||||
m.cuda = new CudaContext();
|
||||
|
||||
if (!m.cuda->isValid())
|
||||
{
|
||||
@ -268,7 +269,7 @@ void Compressor::enableCudaAcceleration(bool enable)
|
||||
}
|
||||
else
|
||||
{
|
||||
m.cuda = new CudaCompressor();
|
||||
m.cuda = new CudaContext();
|
||||
|
||||
if (!m.cuda->isValid())
|
||||
{
|
||||
@ -292,17 +293,18 @@ bool Compressor::process(const InputOptions & inputOptions, const CompressionOpt
|
||||
return m.compress(inputOptions.m, compressionOptions.m, outputOptions.m);
|
||||
}
|
||||
|
||||
|
||||
/// Estimate the size of compressing the input with the given options.
|
||||
int Compressor::estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const
|
||||
{
|
||||
return m.estimateSize(inputOptions.m, compressionOptions.m);
|
||||
}
|
||||
|
||||
|
||||
// RAW api.
|
||||
bool Compressor::compress2D(InputFormat format, int w, int h, void * data, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
|
||||
{
|
||||
// @@ Make sure type of input format matches compression format.
|
||||
#pragma message(NV_FILE_LINE "TODO: Implement raw compress api")
|
||||
return false;
|
||||
}
|
||||
|
||||
int Compressor::estimateSize(int w, int h, int d, const CompressionOptions & compressionOptions) const
|
||||
@ -324,16 +326,21 @@ TexImage Compressor::createTexImage() const
|
||||
return *new TexImage();
|
||||
}
|
||||
|
||||
|
||||
bool Compressor::outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
|
||||
{
|
||||
m.outputHeader(tex, mipmapCount, compressionOptions.m, outputOptions.m);
|
||||
return m.outputHeader(tex, mipmapCount, compressionOptions.m, outputOptions.m);
|
||||
}
|
||||
|
||||
bool Compressor::compress(const TexImage & tex, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
|
||||
{
|
||||
#pragma message(NV_FILE_LINE "TODO: Implement TexImage compress api")
|
||||
|
||||
// @@ Convert to fixed point and call compress2D for each face.
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Estimate the size of compressing the given texture.
|
||||
int Compressor::estimateSize(const TexImage & tex, const CompressionOptions & compressionOptions) const
|
||||
{
|
||||
const uint w = tex.width();
|
||||
@ -345,6 +352,8 @@ int Compressor::estimateSize(const TexImage & tex, const CompressionOptions & co
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
bool Compressor::Private::compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
|
||||
{
|
||||
// Make sure enums match.
|
||||
@ -359,8 +368,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
|
||||
return false;
|
||||
}
|
||||
|
||||
#pragma message(NV_FILE_LINE "TODO: If DefaultOutputHandler, then seek begining of the file.")
|
||||
|
||||
inputOptions.computeTargetExtents();
|
||||
|
||||
// Output DDS header.
|
||||
@ -625,7 +632,10 @@ bool Compressor::Private::outputHeader(const TexImage & tex, int mipmapCount, co
|
||||
{
|
||||
if (tex.width() <= 0 || tex.height() <= 0 || tex.depth() <= 0 || mipmapCount <= 0)
|
||||
{
|
||||
#pragma message(NV_FILE_LINE "TODO: Set invalid argument error.")
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_InvalidInput);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1252,6 +1262,182 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
|
||||
}
|
||||
|
||||
|
||||
CompressorInterface * Compressor::Private::chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const
|
||||
{
|
||||
if (compressionOptions.format == Format_DXT1)
|
||||
{
|
||||
#if defined(HAVE_S3QUANT)
|
||||
if (compressionOptions.externalCompressor == "s3") return new S3CompressorDXT1;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ATITC)
|
||||
if (compressionOptions.externalCompressor == "ati") return new AtiCompressorDXT1;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_SQUISH)
|
||||
if (compressionOptions.externalCompressor == "squish") return new SquishCompressorDXT1;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_D3DX)
|
||||
if (compressionOptions.externalCompressor == "d3dx") return new D3DXCompressorDXT1;
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_D3DX)
|
||||
if (compressionOptions.externalCompressor == "stb") return new StbCompressorDXT1;
|
||||
else
|
||||
#endif
|
||||
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
return new FastCompressorDXT1;
|
||||
}
|
||||
|
||||
return new NormalCompressorDXT1;
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1a)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
return new FastCompressorDXT1a;
|
||||
}
|
||||
|
||||
return new NormalCompressorDXT1a;
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1n)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT3)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
return new FastCompressorDXT3;
|
||||
}
|
||||
|
||||
return new NormalCompressorDXT3;
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT5)
|
||||
{
|
||||
#if defined(HAVE_ATITC)
|
||||
if (compressionOptions.externalCompressor == "ati") return new AtiCompressorDXT5;
|
||||
else
|
||||
#endif
|
||||
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
return new FastCompressorDXT5;
|
||||
}
|
||||
|
||||
return new NormalCompressorDXT5;
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT5n)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
return new FastCompressorDXT5n;
|
||||
}
|
||||
|
||||
return new NormalCompressorDXT5n;
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC4)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest || compressionOptions.quality == Quality_Normal)
|
||||
{
|
||||
return new FastCompressorBC4;
|
||||
}
|
||||
|
||||
return new ProductionCompressorBC4;
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC5)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest || compressionOptions.quality == Quality_Normal)
|
||||
{
|
||||
return new FastCompressorBC5;
|
||||
}
|
||||
|
||||
return new ProductionCompressorBC5;
|
||||
}
|
||||
else if (compressionOptions.format == Format_CTX1)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC6)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC7)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
CompressorInterface * Compressor::Private::chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
// Do not use CUDA compressors in fastest quality mode.
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (compressionOptions.format == Format_DXT1)
|
||||
{
|
||||
return new CudaCompressorDXT1(*cuda);
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1a)
|
||||
{
|
||||
#pragma message(NV_FILE_LINE "TODO: Implement CUDA DXT1a compressor.")
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1n)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT3)
|
||||
{
|
||||
return new CudaCompressorDXT3(*cuda);
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT5)
|
||||
{
|
||||
return new CudaCompressorDXT5(*cuda);
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT5n)
|
||||
{
|
||||
// @@ Return CUDA compressor.
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC4)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC5)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
else if (compressionOptions.format == Format_CTX1)
|
||||
{
|
||||
// @@ Return CUDA compressor.
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC6)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC7)
|
||||
{
|
||||
// Not supported.
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Compress the given mipmap.
|
||||
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
|
||||
{
|
||||
@ -1272,196 +1458,26 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||
const Image * image = mipmap.asFixedImage();
|
||||
nvDebugCheck(image != NULL);
|
||||
|
||||
// @@ Use FastCompressor::isSupported(compressionOptions.format) to chose compressor.
|
||||
|
||||
FastCompressor fast;
|
||||
fast.setImage(image, inputOptions.alphaMode);
|
||||
|
||||
SlowCompressor slow;
|
||||
slow.setImage(image, inputOptions.alphaMode);
|
||||
|
||||
const bool useCuda = cudaEnabled && image->width() * image->height() >= 512;
|
||||
|
||||
if (compressionOptions.format == Format_DXT1)
|
||||
// Decide what compressor to use.
|
||||
CompressorInterface * compressor = NULL;
|
||||
if (cudaEnabled && image->width() * image->height() >= 512)
|
||||
{
|
||||
#if defined(HAVE_S3QUANT)
|
||||
if (compressionOptions.externalCompressor == "s3")
|
||||
{
|
||||
s3CompressDXT1(image, outputOptions);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_ATITC)
|
||||
if (compressionOptions.externalCompressor == "ati")
|
||||
{
|
||||
atiCompressDXT1(image, outputOptions);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_SQUISH)
|
||||
if (compressionOptions.externalCompressor == "squish")
|
||||
{
|
||||
squishCompressDXT1(image, outputOptions);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_D3DX)
|
||||
if (compressionOptions.externalCompressor == "d3dx")
|
||||
{
|
||||
d3dxCompressDXT1(image, outputOptions);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_D3DX)
|
||||
if (compressionOptions.externalCompressor == "stb")
|
||||
{
|
||||
stbCompressDXT1(image, outputOptions);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fast.compressDXT1(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
//cuda->compressDXT1(compressionOptions, outputOptions);
|
||||
cuda->compressDXT1(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
slow.compressDXT1(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
compressor = chooseGpuCompressor(compressionOptions);
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1a)
|
||||
if (compressor == NULL)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fast.compressDXT1a(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
/*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
slow.compressDXT1a(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
compressor = chooseCpuCompressor(compressionOptions);
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT1n)
|
||||
|
||||
if (compressor == NULL)
|
||||
{
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
cuda->compressDXT1n(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
|
||||
}
|
||||
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT3)
|
||||
else
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fast.compressDXT3(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
cuda->compressDXT3(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
slow.compressDXT3(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT5)
|
||||
{
|
||||
#if defined(HAVE_ATITC)
|
||||
if (compressionOptions.externalCompressor == "ati")
|
||||
{
|
||||
atiCompressDXT5(image, outputOptions);
|
||||
}
|
||||
else
|
||||
#endif
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fast.compressDXT5(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
cuda->compressDXT5(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
slow.compressDXT5(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (compressionOptions.format == Format_DXT5n)
|
||||
{
|
||||
if (compressionOptions.quality == Quality_Fastest)
|
||||
{
|
||||
fast.compressDXT5n(outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
/*if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
cuda->compressDXT5n(compressionOptions, outputOptions);
|
||||
}
|
||||
else*/
|
||||
{
|
||||
slow.compressDXT5n(compressionOptions, outputOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC4)
|
||||
{
|
||||
slow.compressBC4(compressionOptions, outputOptions);
|
||||
}
|
||||
else if (compressionOptions.format == Format_BC5)
|
||||
{
|
||||
slow.compressBC5(compressionOptions, outputOptions);
|
||||
}
|
||||
else if (compressionOptions.format == Format_CTX1)
|
||||
{
|
||||
if (useCuda)
|
||||
{
|
||||
nvDebugCheck(cudaSupported);
|
||||
cuda->setImage(image, inputOptions.alphaMode);
|
||||
cuda->compressCTX1(compressionOptions, outputOptions);
|
||||
}
|
||||
else
|
||||
{
|
||||
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
|
||||
}
|
||||
compressor->compress(InputFormat_BGRA_8UB, inputOptions.alphaMode, image->width(), image->height(), (void *)image->pixels(), compressionOptions, outputOptions);
|
||||
|
||||
delete compressor;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -27,6 +27,7 @@
|
||||
#include <nvcore/Ptr.h>
|
||||
|
||||
#include <nvtt/cuda/CudaCompressDXT.h>
|
||||
#include <nvtt/CompressDXT.h>
|
||||
|
||||
#include "nvtt.h"
|
||||
|
||||
@ -44,6 +45,9 @@ namespace nvtt
|
||||
Private() {}
|
||||
|
||||
bool compress(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
|
||||
|
||||
bool compress(const void * data, int width, int height, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const;
|
||||
|
||||
int estimateSize(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions) const;
|
||||
|
||||
bool outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions);
|
||||
@ -51,6 +55,10 @@ namespace nvtt
|
||||
private:
|
||||
|
||||
bool outputHeader(const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
|
||||
|
||||
nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const;
|
||||
nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const;
|
||||
|
||||
bool compressMipmaps(uint f, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
|
||||
|
||||
bool initMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d, uint f, uint m) const;
|
||||
@ -71,7 +79,7 @@ namespace nvtt
|
||||
bool cudaSupported;
|
||||
bool cudaEnabled;
|
||||
|
||||
nv::AutoPtr<nv::CudaCompressor> cuda;
|
||||
nv::AutoPtr<nv::CudaContext> cuda;
|
||||
|
||||
};
|
||||
|
||||
|
@ -296,6 +296,51 @@ __device__ float3 blockError3(const float3 * colors, uint permutation, float3 a,
|
||||
// Sort colors
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// @@ Experimental code to avoid duplicate colors for faster compression.
|
||||
// We could first sort along the best fit line and only compare colors that have the same projection.
|
||||
// The hardest part is to maintain the indices to map packed/sorted colors to the input colors.
|
||||
// We also need to update several functions that assume the number of colors is fixed to 16.
|
||||
// And compute different bit maps for the different color counts.
|
||||
// This is a fairly high amount of work.
|
||||
__device__ int packColors(float3 * values, float * weights, int * ranks)
|
||||
{
|
||||
const int tid = threadIdx.x;
|
||||
|
||||
__shared__ int count;
|
||||
count = 0;
|
||||
|
||||
bool alive = true;
|
||||
|
||||
// Append this
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
// One thread leads on each iteration.
|
||||
if (tid == i) {
|
||||
|
||||
// If thread alive, then append element.
|
||||
if (alive) {
|
||||
values[count] = values[i];
|
||||
weights[count] = weights[i];
|
||||
count++;
|
||||
}
|
||||
|
||||
// Otherwise update weight.
|
||||
else {
|
||||
weights[ranks[i]] += weights[i];
|
||||
}
|
||||
}
|
||||
|
||||
// Kill all threads that have the same element and record rank.
|
||||
if (values[i] == values[tid]) {
|
||||
alive = false;
|
||||
ranks[tid] = count - 1;
|
||||
}
|
||||
}
|
||||
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
__device__ void sortColors(const float * values, int * ranks)
|
||||
{
|
||||
#if __DEVICE_EMULATION__
|
||||
@ -343,12 +388,60 @@ __device__ void sortColors(const float * values, int * ranks)
|
||||
#endif
|
||||
}
|
||||
|
||||
__device__ void sortColors(const float * values, int * ranks, int count)
|
||||
{
|
||||
#if __DEVICE_EMULATION__
|
||||
if (threadIdx.x == 0)
|
||||
{
|
||||
for (int tid = 0; tid < count; tid++)
|
||||
{
|
||||
int rank = 0;
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
rank += (values[i] < values[tid]);
|
||||
}
|
||||
|
||||
ranks[tid] = rank;
|
||||
}
|
||||
|
||||
// Resolve elements with the same index.
|
||||
for (int i = 0; i < count-1; i++)
|
||||
{
|
||||
for (int tid = 0; tid < count; tid++)
|
||||
{
|
||||
if (tid > i && ranks[tid] == ranks[i]) ++ranks[tid];
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
const int tid = threadIdx.x;
|
||||
|
||||
int rank = 0;
|
||||
|
||||
#pragma unroll
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
rank += (values[i] < values[tid]);
|
||||
}
|
||||
|
||||
ranks[tid] = rank;
|
||||
|
||||
// Resolve elements with the same index.
|
||||
#pragma unroll
|
||||
for (int i = 0; i < count-1; i++)
|
||||
{
|
||||
if ((tid > i) & (ranks[tid] == ranks[i])) ++ranks[tid];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Load color block to shared mem
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
|
||||
/*__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
@ -389,9 +482,9 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
__debugsync();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
}*/
|
||||
|
||||
__device__ void loadColorBlockTex(uint bn, uint w, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
|
||||
__device__ void loadColorBlockTex(uint firstBlock, uint width, float3 colors[16], float3 sums[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
@ -400,8 +493,8 @@ __device__ void loadColorBlockTex(uint bn, uint w, float3 colors[16], float3 sum
|
||||
|
||||
if (idx < 16)
|
||||
{
|
||||
float x = 4 * ((bn + bid) % w) + idx % 4; // @@ Avoid mod and div by using 2D grid?
|
||||
float y = 4 * ((bn + bid) / w) + idx / 4;
|
||||
float x = 4 * ((firstBlock + bid) % width) + idx % 4; // @@ Avoid mod and div by using 2D grid?
|
||||
float y = 4 * ((firstBlock + bid) / width) + idx / 4;
|
||||
|
||||
// Read color and copy to shared mem.
|
||||
float4 c = tex2D(tex, x, y);
|
||||
@ -437,10 +530,107 @@ __device__ void loadColorBlockTex(uint bn, uint w, float3 colors[16], float3 sum
|
||||
__debugsync();
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
__device__ void loadColorBlockTex(uint firstBlock, uint w, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
|
||||
__shared__ float dps[16];
|
||||
|
||||
if (idx < 16)
|
||||
{
|
||||
float x = 4 * ((firstBlock + bid) % w) + idx % 4; // @@ Avoid mod and div by using 2D grid?
|
||||
float y = 4 * ((firstBlock + bid) / w) + idx / 4;
|
||||
|
||||
// Read color and copy to shared mem.
|
||||
float4 c = tex2D(tex, x, y);
|
||||
|
||||
colors[idx].x = c.z;
|
||||
colors[idx].y = c.y;
|
||||
colors[idx].z = c.x;
|
||||
weights[idx] = 1;
|
||||
|
||||
int count = packColors(colors, weights);
|
||||
if (idx < count)
|
||||
{
|
||||
// Sort colors along the best fit line.
|
||||
colorSums(colors, sums);
|
||||
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
|
||||
|
||||
*sameColor = (axis == make_float3(0, 0, 0));
|
||||
|
||||
dps[idx] = dot(colors[idx], axis);
|
||||
|
||||
sortColors(dps, xrefs);
|
||||
|
||||
float3 tmp = colors[idx];
|
||||
colors[xrefs[idx]] = tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
__device__ void loadColorBlockTex(uint firstBlock, uint width, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
const int idx = threadIdx.x;
|
||||
|
||||
__shared__ float3 rawColors[16];
|
||||
__shared__ float dps[16];
|
||||
|
||||
if (idx < 16)
|
||||
{
|
||||
float x = 4 * ((firstBlock + bid) % width) + idx % 4; // @@ Avoid mod and div by using 2D grid?
|
||||
float y = 4 * ((firstBlock + bid) / width) + idx / 4;
|
||||
|
||||
// Read color and copy to shared mem.
|
||||
float4 c = tex2D(tex, x, y);
|
||||
|
||||
rawColors[idx].x = c.z;
|
||||
rawColors[idx].y = c.y;
|
||||
rawColors[idx].z = c.x;
|
||||
weights[idx] = c.w;
|
||||
|
||||
colors[idx] = rawColors[idx] * weights[idx];
|
||||
|
||||
// No need to synchronize, 16 < warp size.
|
||||
__debugsync();
|
||||
|
||||
// Sort colors along the best fit line.
|
||||
colorSums(colors, sums);
|
||||
float3 axis = bestFitLine(colors, sums[0], kColorMetric);
|
||||
|
||||
*sameColor = (axis == make_float3(0, 0, 0));
|
||||
|
||||
// Single color compressor needs unweighted colors.
|
||||
if (*sameColor) colors[idx] = rawColors[idx];
|
||||
|
||||
dps[idx] = dot(colors[idx], axis);
|
||||
|
||||
__debugsync();
|
||||
|
||||
sortColors(dps, xrefs);
|
||||
|
||||
float3 tmp = colors[idx];
|
||||
float w = weights[idx];
|
||||
__debugsync();
|
||||
colors[xrefs[idx]] = tmp;
|
||||
weights[xrefs[idx]] = w;
|
||||
}
|
||||
#if __DEVICE_EMULATION__
|
||||
else
|
||||
{
|
||||
__debugsync();
|
||||
__debugsync();
|
||||
__debugsync();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
const int bid = blockIdx.x;
|
||||
@ -494,6 +684,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
|
||||
}
|
||||
#endif
|
||||
}
|
||||
*/
|
||||
|
||||
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16], int * sameColor)
|
||||
{
|
||||
@ -1457,48 +1648,15 @@ __device__ void saveSingleColorBlockCTX1(float2 color, uint2 * result)
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Compress color block
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
__global__ void compressDXT1(const uint * permutations, const uint * image, uint2 * result)
|
||||
|
||||
__global__ void compressDXT1(uint firstBlock, uint w, const uint * permutations, uint2 * result)
|
||||
{
|
||||
__shared__ float3 colors[16];
|
||||
__shared__ float3 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
if (sameColor)
|
||||
{
|
||||
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
|
||||
return;
|
||||
}
|
||||
|
||||
ushort bestStart, bestEnd;
|
||||
uint bestPermutation;
|
||||
|
||||
__shared__ float errors[NUM_THREADS];
|
||||
|
||||
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
|
||||
|
||||
// Use a parallel reduction to find minimum error.
|
||||
const int minIdx = findMinError(errors);
|
||||
|
||||
// Only write the result of the winner thread.
|
||||
if (threadIdx.x == minIdx)
|
||||
{
|
||||
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void compressDXT1_Tex(uint bn, uint w, const uint * permutations, uint2 * result)
|
||||
{
|
||||
__shared__ float3 colors[16];
|
||||
__shared__ float3 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlockTex(bn, w, colors, sums, xrefs, &sameColor);
|
||||
loadColorBlockTex(firstBlock, w, colors, sums, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
@ -1534,14 +1692,14 @@ __global__ void compressDXT1_Tex(uint bn, uint w, const uint * permutations, uin
|
||||
}
|
||||
|
||||
|
||||
__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
|
||||
__global__ void compressLevel4DXT1(uint firstBlock, uint w, const uint * permutations, uint2 * result)
|
||||
{
|
||||
__shared__ float3 colors[16];
|
||||
__shared__ float3 sums[16];
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, xrefs, &sameColor);
|
||||
loadColorBlockTex(firstBlock, w, colors, sums, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
@ -1568,7 +1726,7 @@ __global__ void compressLevel4DXT1(const uint * permutations, const uint * image
|
||||
}
|
||||
}
|
||||
|
||||
__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result)
|
||||
__global__ void compressWeightedDXT1(uint firstBlock, uint w, const uint * permutations, uint2 * result)
|
||||
{
|
||||
__shared__ float3 colors[16];
|
||||
__shared__ float3 sums[16];
|
||||
@ -1576,7 +1734,7 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
|
||||
__shared__ int xrefs[16];
|
||||
__shared__ int sameColor;
|
||||
|
||||
loadColorBlock(image, colors, sums, weights, xrefs, &sameColor);
|
||||
loadColorBlockTex(firstBlock, w, colors, sums, weights, xrefs, &sameColor);
|
||||
|
||||
__syncthreads();
|
||||
|
||||
@ -1987,17 +2145,7 @@ extern "C" void setupCompressKernel(const float weights[3])
|
||||
cudaMemcpyToSymbol(kColorMetricSqr, weightsSqr, sizeof(float) * 3, 0);
|
||||
}
|
||||
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Launch kernel
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressKernelDXT1_Tex(uint bn, uint blockNum, uint w, cudaArray * d_data, uint * d_result, uint * d_bitmaps)
|
||||
extern "C" void bindTextureToArray(cudaArray * d_data)
|
||||
{
|
||||
// Setup texture
|
||||
tex.normalized = false;
|
||||
@ -2006,21 +2154,61 @@ extern "C" void compressKernelDXT1_Tex(uint bn, uint blockNum, uint w, cudaArray
|
||||
tex.addressMode[1] = cudaAddressModeClamp;
|
||||
|
||||
cudaBindTextureToArray(tex, d_data);
|
||||
|
||||
compressDXT1_Tex<<<blockNum, NUM_THREADS>>>(bn, w, d_bitmaps, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
|
||||
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// Launch kernel
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// DXT1 compressors:
|
||||
extern "C" void compressKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
compressDXT1<<<blockNum, NUM_THREADS>>>(firstBlock, w, d_bitmaps, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
extern "C" void compressKernelDXT1_Level4(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(firstBlock, w, d_bitmaps, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressWeightedKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(firstBlock, w, d_bitmaps, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
// @@ DXT1a compressors.
|
||||
|
||||
|
||||
// @@ DXT3 compressors:
|
||||
extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
//compressDXT3<<<blockNum, NUM_THREADS>>>(firstBlock, w, d_bitmaps, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressWeightedKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
//compressWeightedDXT3<<<blockNum, NUM_THREADS>>>(firstBlock, w, d_bitmaps, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
|
||||
// @@ DXT5 compressors.
|
||||
extern "C" void compressKernelDXT5(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
//compressDXT5<<<blockNum, NUM_THREADS>>>(firstBlock, w, d_bitmaps, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
extern "C" void compressWeightedKernelDXT5(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
//compressWeightedDXT5<<<blockNum, NUM_THREADS>>>(firstBlock, w, d_bitmaps, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
/*
|
||||
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
|
||||
{
|
||||
compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
@ -2030,16 +2218,10 @@ extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result
|
||||
{
|
||||
compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
|
||||
}
|
||||
|
||||
*/
|
||||
/*
|
||||
extern "C" void compressKernelDXT5n(uint blockNum, cudaArray * d_data, uint * d_result)
|
||||
{
|
||||
// Setup texture
|
||||
tex.normalized = false;
|
||||
tex.filterMode = cudaFilterModePoint;
|
||||
tex.addressMode[0] = cudaAddressModeClamp;
|
||||
tex.addressMode[1] = cudaAddressModeClamp;
|
||||
|
||||
cudaBindTextureToArray(tex, d_data);
|
||||
|
||||
// compressDXT5n<<<blockNum/128, 128>>>(blockNum, (uint2 *)d_result);
|
||||
}
|
||||
*/
|
@ -52,16 +52,20 @@ using namespace nvtt;
|
||||
|
||||
|
||||
extern "C" void setupCompressKernel(const float weights[3]);
|
||||
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressKernelDXT1_Tex(uint bn, uint blockNum, uint w, cudaArray * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void bindTextureToArray(cudaArray * d_data);
|
||||
|
||||
extern "C" void compressKernelDXT1(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
extern "C" void compressKernelDXT3(uint firstBlock, uint blockNum, uint w, uint * d_result, uint * d_bitmaps);
|
||||
//extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
//extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
||||
|
||||
|
||||
#include "Bitmaps.h" // @@ Rename to BitmapTable.h
|
||||
#pragma message(NV_FILE_LINE "TODO: Rename Bitmaps.h to BitmapTable.h")
|
||||
#include "Bitmaps.h"
|
||||
|
||||
/*
|
||||
// Convert linear image to block linear.
|
||||
static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
|
||||
{
|
||||
@ -81,45 +85,49 @@ static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
|
||||
}
|
||||
}
|
||||
}
|
||||
*/
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_bitmapTableCTX(NULL), m_data(NULL), m_result(NULL)
|
||||
CudaContext::CudaContext() :
|
||||
bitmapTable(NULL),
|
||||
bitmapTableCTX(NULL),
|
||||
data(NULL),
|
||||
result(NULL)
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
// Allocate and upload bitmaps.
|
||||
cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint));
|
||||
if (m_bitmapTable != NULL)
|
||||
cudaMalloc((void**) &bitmapTable, 992 * sizeof(uint));
|
||||
if (bitmapTable != NULL)
|
||||
{
|
||||
cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
cudaMalloc((void**) &m_bitmapTableCTX, 704 * sizeof(uint));
|
||||
|
||||
if (m_bitmapTableCTX != NULL)
|
||||
cudaMalloc((void**) &bitmapTableCTX, 704 * sizeof(uint));
|
||||
if (bitmapTableCTX != NULL)
|
||||
{
|
||||
cudaMemcpy(m_bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
// Allocate scratch buffers.
|
||||
cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U);
|
||||
cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U);
|
||||
cudaMalloc((void**) &data, MAX_BLOCKS * 64U);
|
||||
cudaMalloc((void**) &result, MAX_BLOCKS * 8U);
|
||||
#endif
|
||||
}
|
||||
|
||||
CudaCompressor::~CudaCompressor()
|
||||
CudaContext::~CudaContext()
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
// Free device mem allocations.
|
||||
cudaFree(m_data);
|
||||
cudaFree(m_result);
|
||||
cudaFree(m_bitmapTable);
|
||||
cudaFree(m_bitmapTableCTX);
|
||||
cudaFree(bitmapTableCTX);
|
||||
cudaFree(bitmapTable);
|
||||
cudaFree(data);
|
||||
cudaFree(result);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool CudaCompressor::isValid() const
|
||||
bool CudaContext::isValid() const
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
cudaError_t err = cudaGetLastError();
|
||||
@ -129,185 +137,178 @@ bool CudaCompressor::isValid() const
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return m_data != NULL && m_result != NULL && m_bitmapTable != NULL;
|
||||
return bitmapTable != NULL && bitmapTableCTX != NULL && data != NULL && result != NULL;
|
||||
}
|
||||
|
||||
|
||||
|
||||
CudaCompressor::CudaCompressor(CudaContext & ctx) : m_ctx(ctx)
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
void CudaCompressor::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
// Allocate image as a cuda array.
|
||||
cudaArray * d_image;
|
||||
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
|
||||
{
|
||||
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
|
||||
cudaMallocArray(&d_image, &channelDesc, w, h);
|
||||
|
||||
const int imageSize = w * h * sizeof(uint);
|
||||
cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice);
|
||||
}
|
||||
else
|
||||
{
|
||||
#pragma message(NV_FILE_LINE "FIXME: Floating point textures not really supported by CUDA compressors.")
|
||||
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(32, 32, 32, 32, cudaChannelFormatKindFloat);
|
||||
cudaMallocArray(&d_image, &channelDesc, w, h);
|
||||
|
||||
const int imageSize = w * h * sizeof(uint);
|
||||
cudaMemcpyToArray(d_image, 0, 0, data, imageSize, cudaMemcpyHostToDevice);
|
||||
}
|
||||
|
||||
// Image size in blocks.
|
||||
const uint bw = (w + 3) / 4;
|
||||
const uint bh = (h + 3) / 4;
|
||||
const uint bs = blockSize();
|
||||
const uint blockNum = bw * bh;
|
||||
const uint compressedSize = blockNum * bs;
|
||||
|
||||
void * h_result = malloc(min(blockNum, MAX_BLOCKS) * bs);
|
||||
|
||||
setup(d_image, compressionOptions);
|
||||
|
||||
// Timer timer;
|
||||
// timer.start();
|
||||
|
||||
uint bn = 0;
|
||||
while(bn != blockNum)
|
||||
{
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
compressBlocks(bn, count, w, h, alphaMode, compressionOptions, h_result);
|
||||
|
||||
// Check for errors.
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
//nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_CudaError);
|
||||
}
|
||||
}
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
{
|
||||
outputOptions.outputHandler->writeData(h_result, count * bs);
|
||||
}
|
||||
|
||||
bn += count;
|
||||
}
|
||||
|
||||
//timer.stop();
|
||||
//printf("\rCUDA time taken: %.3f seconds\n", timer.elapsed() / CLOCKS_PER_SEC);
|
||||
|
||||
free(h_result);
|
||||
cudaFreeArray(d_image);
|
||||
|
||||
#else
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_CudaError);
|
||||
}
|
||||
#endif
|
||||
|
||||
}
|
||||
|
||||
|
||||
void CudaCompressorDXT1::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
bindTextureToArray(image);
|
||||
}
|
||||
|
||||
void CudaCompressorDXT1::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
// Launch kernel.
|
||||
compressKernelDXT1(first, count, w, m_ctx.result, m_ctx.bitmapTable);
|
||||
|
||||
// Copy result to host.
|
||||
cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
|
||||
void CudaCompressorDXT3::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
bindTextureToArray(image);
|
||||
}
|
||||
|
||||
void CudaCompressorDXT3::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
// Launch kernel.
|
||||
compressKernelDXT3(first, count, w, m_ctx.result, m_ctx.bitmapTable);
|
||||
|
||||
// Copy result to host.
|
||||
cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost);
|
||||
}
|
||||
|
||||
|
||||
void CudaCompressorDXT5::setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
bindTextureToArray(image);
|
||||
}
|
||||
|
||||
void CudaCompressorDXT5::compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
/*// Launch kernel.
|
||||
compressKernelDXT5(first, count, w, m_ctx.result, m_ctx.bitmapTable);
|
||||
|
||||
// Copy result to host.
|
||||
cudaMemcpy(output, m_ctx.result, count * 16, cudaMemcpyDeviceToHost);*/
|
||||
|
||||
// Launch kernel.
|
||||
if (alphaMode == AlphaMode_Transparency)
|
||||
{
|
||||
// compressWeightedKernelDXT1(first, count, w, m_ctx.result, m_ctx.bitmapTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
// compressKernelDXT1_Level4(first, count, w, m_ctx.result, m_ctx.bitmapTable);
|
||||
}
|
||||
|
||||
// Compress alpha in parallel with the GPU.
|
||||
for (uint i = 0; i < count; i++)
|
||||
{
|
||||
//ColorBlock rgba(blockLinearImage + (first + i) * 16);
|
||||
//OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
|
||||
}
|
||||
|
||||
// Copy result to host.
|
||||
cudaMemcpy(output, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// @@ Interleave color and alpha blocks.
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// @@ This code is very repetitive and needs to be cleaned up.
|
||||
|
||||
#if 0
|
||||
|
||||
struct CudaCompressionKernel
|
||||
{
|
||||
virtual void setup(const CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
}
|
||||
|
||||
virtual void setBitmapTable();
|
||||
|
||||
virtual void runDeviceCode(int count);
|
||||
|
||||
virtual void runHostCode(int count);
|
||||
|
||||
};
|
||||
|
||||
void CudaCompressor::compressKernel(CudaCompressionKernel * kernel)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
|
||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
|
||||
clock_t start = clock();
|
||||
|
||||
kernel->setup(compressionOptions);
|
||||
kernel->setBitmapTable(m_bitmapTable);
|
||||
|
||||
// TODO: Add support for multiple GPUs.
|
||||
uint bn = 0;
|
||||
while(bn != blockNum)
|
||||
{
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
kernel->runDeviceCode(count, m_data, m_result);
|
||||
|
||||
kernel->runHostCode(count);
|
||||
|
||||
// Check for errors.
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
|
||||
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_CudaError);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
kernel->outputResult(outputOptions.outputHandler);
|
||||
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
{
|
||||
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
|
||||
}
|
||||
|
||||
bn += count;
|
||||
}
|
||||
|
||||
clock_t end = clock();
|
||||
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
|
||||
|
||||
free(blockLinearImage);
|
||||
|
||||
#else
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_CudaError);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // 0
|
||||
|
||||
|
||||
void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
|
||||
{
|
||||
m_image = image;
|
||||
m_alphaMode = alphaMode;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
// Allocate image as a cuda array.
|
||||
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 8, 8, 8, cudaChannelFormatKindUnsigned);
|
||||
|
||||
cudaArray * d_image;
|
||||
const int imageSize = m_image->width() * m_image->height() * sizeof(uint);
|
||||
cudaMallocArray(&d_image, &channelDesc, m_image->width(), m_image->height());
|
||||
cudaMemcpyToArray(d_image, 0, 0, m_image->pixels(), imageSize, cudaMemcpyHostToDevice);
|
||||
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (m_image->width() + 3) / 4;
|
||||
const uint h = (m_image->height() + 3) / 4;
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
|
||||
void * h_result = malloc(min(blockNum, MAX_BLOCKS) * 8);
|
||||
|
||||
//clock_t start = clock();
|
||||
|
||||
setupCompressKernel(compressionOptions.colorWeight.ptr());
|
||||
|
||||
uint bn = 0;
|
||||
while(bn != blockNum)
|
||||
{
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
// Launch kernel.
|
||||
compressKernelDXT1_Tex(bn, count, w, d_image, m_result, m_bitmapTable);
|
||||
|
||||
// Check for errors.
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
|
||||
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_CudaError);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(h_result, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
{
|
||||
outputOptions.outputHandler->writeData(h_result, count * 8);
|
||||
}
|
||||
|
||||
bn += count;
|
||||
}
|
||||
|
||||
//clock_t end = clock();
|
||||
//printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
|
||||
|
||||
free(h_result);
|
||||
|
||||
#else
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_CudaError);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
@ -337,16 +338,16 @@ void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressio
|
||||
{
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
if (m_alphaMode == AlphaMode_Transparency)
|
||||
{
|
||||
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
|
||||
compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
|
||||
compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
|
||||
}
|
||||
|
||||
// Compress alpha in parallel with the GPU.
|
||||
@ -369,7 +370,7 @@ void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressio
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
@ -428,16 +429,16 @@ void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressio
|
||||
{
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
if (m_alphaMode == AlphaMode_Transparency)
|
||||
{
|
||||
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
|
||||
compressWeightedKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
|
||||
compressKernelDXT1_Level4(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
|
||||
}
|
||||
|
||||
// Compress alpha in parallel with the GPU.
|
||||
@ -460,7 +461,7 @@ void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressio
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
@ -516,10 +517,10 @@ void CudaCompressor::compressDXT1n(const nvtt::CompressionOptions::Private & com
|
||||
{
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
compressNormalKernelDXT1(count, m_data, m_result, m_bitmapTable);
|
||||
compressNormalKernelDXT1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTable);
|
||||
|
||||
// Check for errors.
|
||||
cudaError_t err = cudaGetLastError();
|
||||
@ -534,7 +535,7 @@ void CudaCompressor::compressDXT1n(const nvtt::CompressionOptions::Private & com
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
@ -585,10 +586,10 @@ void CudaCompressor::compressCTX1(const nvtt::CompressionOptions::Private & comp
|
||||
{
|
||||
uint count = min(blockNum - bn, MAX_BLOCKS);
|
||||
|
||||
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
cudaMemcpy(m_ctx.data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
compressKernelCTX1(count, m_data, m_result, m_bitmapTableCTX);
|
||||
compressKernelCTX1(count, m_ctx.data, m_ctx.result, m_ctx.bitmapTableCTX);
|
||||
|
||||
// Check for errors.
|
||||
cudaError_t err = cudaGetLastError();
|
||||
@ -603,7 +604,7 @@ void CudaCompressor::compressCTX1(const nvtt::CompressionOptions::Private & comp
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
|
||||
cudaMemcpy(blockLinearImage, m_ctx.result, count * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
@ -643,4 +644,4 @@ void CudaCompressor::compressDXT5n(const nvtt::CompressionOptions::Private & com
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
#endif // 0
|
||||
|
@ -27,38 +27,86 @@
|
||||
#include <nvimage/nvimage.h>
|
||||
#include <nvtt/nvtt.h>
|
||||
|
||||
#include "nvtt/CompressDXT.h"
|
||||
|
||||
struct cudaArray;
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Image;
|
||||
|
||||
class CudaCompressor
|
||||
class CudaContext
|
||||
{
|
||||
public:
|
||||
CudaCompressor();
|
||||
~CudaCompressor();
|
||||
CudaContext();
|
||||
~CudaContext();
|
||||
|
||||
bool isValid() const;
|
||||
|
||||
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
|
||||
|
||||
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
private:
|
||||
|
||||
uint * m_bitmapTable;
|
||||
uint * m_bitmapTableCTX;
|
||||
uint * m_data;
|
||||
uint * m_result;
|
||||
|
||||
const Image * m_image;
|
||||
nvtt::AlphaMode m_alphaMode;
|
||||
public:
|
||||
// Device pointers.
|
||||
uint * bitmapTable;
|
||||
uint * bitmapTableCTX;
|
||||
uint * data;
|
||||
uint * result;
|
||||
};
|
||||
|
||||
|
||||
struct CudaCompressor : public CompressorInterface
|
||||
{
|
||||
CudaCompressor(CudaContext & ctx);
|
||||
|
||||
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) = 0;
|
||||
virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
|
||||
virtual uint blockSize() const = 0;
|
||||
|
||||
protected:
|
||||
CudaContext & m_ctx;
|
||||
};
|
||||
|
||||
struct CudaCompressorDXT1 : public CudaCompressor
|
||||
{
|
||||
CudaCompressorDXT1(CudaContext & ctx) : CudaCompressor(ctx) {}
|
||||
|
||||
virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 8; };
|
||||
};
|
||||
|
||||
/*struct CudaCompressorDXT1n : public CudaCompressor
|
||||
{
|
||||
virtual void setup(const CompressionOptions::Private & compressionOptions);
|
||||
virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
|
||||
virtual uint blockSize() const { return 8; };
|
||||
};*/
|
||||
|
||||
struct CudaCompressorDXT3 : public CudaCompressor
|
||||
{
|
||||
CudaCompressorDXT3(CudaContext & ctx) : CudaCompressor(ctx) {}
|
||||
|
||||
virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; };
|
||||
};
|
||||
|
||||
struct CudaCompressorDXT5 : public CudaCompressor
|
||||
{
|
||||
CudaCompressorDXT5(CudaContext & ctx) : CudaCompressor(ctx) {}
|
||||
|
||||
virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions);
|
||||
virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
virtual uint blockSize() const { return 16; };
|
||||
};
|
||||
|
||||
/*struct CudaCompressorCXT1 : public CudaCompressor
|
||||
{
|
||||
virtual void setup(const CompressionOptions::Private & compressionOptions);
|
||||
virtual void compressBlocks(uint blockCount, const void * input, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
|
||||
virtual uint blockSize() const { return 8; };
|
||||
};*/
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
|
@ -93,6 +93,9 @@ namespace nvtt
|
||||
Format_DXT1n,
|
||||
Format_CTX1,
|
||||
Format_YCoCg_DXT5,
|
||||
|
||||
Format_BC6,
|
||||
Format_BC7,
|
||||
};
|
||||
|
||||
/// Pixel types.
|
||||
|
Loading…
Reference in New Issue
Block a user