Precompute fast cluster fit factors, and store as static const.
nvtt is completely reentrant now. Fixes issue 37. cleanup interface of cuda compressors.
This commit is contained in:
@ -213,21 +213,27 @@ void CudaCompressor::compressKernel(CudaCompressionKernel * kernel)
|
||||
#endif // 0
|
||||
|
||||
|
||||
void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
|
||||
{
|
||||
m_image = image;
|
||||
m_alphaMode = alphaMode;
|
||||
}
|
||||
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
const uint w = (m_image->width() + 3) / 4;
|
||||
const uint h = (m_image->height() + 3) / 4;
|
||||
|
||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
@ -286,18 +292,18 @@ void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions:
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
const uint w = (m_image->width() + 3) / 4;
|
||||
const uint h = (m_image->height() + 3) / 4;
|
||||
|
||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||
convertToBlockLinear(image, blockLinearImage);
|
||||
convertToBlockLinear(m_image, blockLinearImage);
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
@ -370,18 +376,18 @@ void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Priva
|
||||
|
||||
|
||||
/// Compress image using CUDA.
|
||||
void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
const uint w = (m_image->width() + 3) / 4;
|
||||
const uint h = (m_image->height() + 3) / 4;
|
||||
|
||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||
convertToBlockLinear(image, blockLinearImage);
|
||||
convertToBlockLinear(m_image, blockLinearImage);
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
@ -453,18 +459,18 @@ void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Priva
|
||||
}
|
||||
|
||||
|
||||
void CudaCompressor::compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
void CudaCompressor::compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
const uint w = (m_image->width() + 3) / 4;
|
||||
const uint h = (m_image->height() + 3) / 4;
|
||||
|
||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
@ -522,18 +528,18 @@ void CudaCompressor::compressDXT1n(const Image * image, const nvtt::CompressionO
|
||||
}
|
||||
|
||||
|
||||
void CudaCompressor::compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
void CudaCompressor::compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||
{
|
||||
nvDebugCheck(cuda::isHardwarePresent());
|
||||
#if defined HAVE_CUDA
|
||||
|
||||
// Image size in blocks.
|
||||
const uint w = (image->width() + 3) / 4;
|
||||
const uint h = (image->height() + 3) / 4;
|
||||
const uint w = (m_image->width() + 3) / 4;
|
||||
const uint h = (m_image->height() + 3) / 4;
|
||||
|
||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||
|
||||
const uint blockNum = w * h;
|
||||
const uint compressedSize = blockNum * 8;
|
||||
@ -590,186 +596,3 @@ void CudaCompressor::compressCTX1(const Image * image, const nvtt::CompressionOp
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
#if 0
|
||||
|
||||
class Task
|
||||
{
|
||||
public:
|
||||
explicit Task(uint numBlocks) : blockMaxCount(numBlocks), blockCount(0)
|
||||
{
|
||||
// System memory allocations.
|
||||
blockLinearImage = new uint[blockMaxCount * 16];
|
||||
xrefs = new uint[blockMaxCount * 16];
|
||||
|
||||
// Device memory allocations.
|
||||
cudaMalloc((void**) &d_blockLinearImage, blockMaxCount * 16 * sizeof(uint));
|
||||
cudaMalloc((void**) &d_compressedImage, blockMaxCount * 8U);
|
||||
|
||||
// @@ Check for allocation errors.
|
||||
}
|
||||
|
||||
~Task()
|
||||
{
|
||||
delete [] blockLinearImage;
|
||||
delete [] xrefs;
|
||||
|
||||
cudaFree(d_blockLinearImage);
|
||||
cudaFree(d_compressedImage);
|
||||
}
|
||||
|
||||
|
||||
|
||||
void addColorBlock(const ColorBlock & rgba)
|
||||
{
|
||||
nvDebugCheck(!isFull());
|
||||
|
||||
// @@ Count unique colors?
|
||||
/*
|
||||
// Convert colors to vectors.
|
||||
Array<Vector3> pointArray(16);
|
||||
|
||||
for(int i = 0; i < 16; i++) {
|
||||
const Color32 color = rgba.color(i);
|
||||
pointArray.append(Vector3(color.r, color.g, color.b));
|
||||
}
|
||||
|
||||
// Find best fit line.
|
||||
const Vector3 axis = Fit::bestLine(pointArray).direction();
|
||||
|
||||
// Project points to axis.
|
||||
float dps[16];
|
||||
uint * order = &xrefs[blockCount * 16];
|
||||
|
||||
for (uint i = 0; i < 16; ++i)
|
||||
{
|
||||
dps[i] = dot(pointArray[i], axis);
|
||||
order[i] = i;
|
||||
}
|
||||
|
||||
// Sort them.
|
||||
for (uint i = 0; i < 16; ++i)
|
||||
{
|
||||
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
|
||||
{
|
||||
swap(dps[j], dps[j - 1]);
|
||||
swap(order[j], order[j - 1]);
|
||||
}
|
||||
}
|
||||
*/
|
||||
// Write sorted colors to blockLinearImage.
|
||||
for(uint i = 0; i < 16; ++i)
|
||||
{
|
||||
// blockLinearImage[blockCount * 16 + i] = rgba.color(order[i]);
|
||||
blockLinearImage[blockCount * 16 + i] = rgba.color(i);
|
||||
}
|
||||
|
||||
++blockCount;
|
||||
}
|
||||
|
||||
bool isFull()
|
||||
{
|
||||
nvDebugCheck(blockCount <= blockMaxCount);
|
||||
return blockCount == blockMaxCount;
|
||||
}
|
||||
|
||||
void flush(const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
if (blockCount == 0)
|
||||
{
|
||||
// Nothing to do.
|
||||
return;
|
||||
}
|
||||
|
||||
// Copy input color blocks.
|
||||
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
|
||||
|
||||
// Launch kernel.
|
||||
compressKernelDXT1(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
|
||||
|
||||
// Check for errors.
|
||||
cudaError_t err = cudaGetLastError();
|
||||
if (err != cudaSuccess)
|
||||
{
|
||||
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
|
||||
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_CudaError);
|
||||
}
|
||||
}
|
||||
|
||||
// Copy result to host, overwrite swizzled image.
|
||||
uint * compressedImage = blockLinearImage;
|
||||
cudaMemcpy(compressedImage, d_compressedImage, blockCount * 8, cudaMemcpyDeviceToHost);
|
||||
|
||||
// @@ Sort block indices.
|
||||
|
||||
// Output result.
|
||||
if (outputOptions.outputHandler != NULL)
|
||||
{
|
||||
// outputOptions.outputHandler->writeData(compressedImage, blockCount * 8);
|
||||
}
|
||||
|
||||
blockCount = 0;
|
||||
}
|
||||
|
||||
private:
|
||||
|
||||
const uint blockMaxCount;
|
||||
uint blockCount;
|
||||
|
||||
uint * blockLinearImage;
|
||||
uint * xrefs;
|
||||
|
||||
uint * d_blockLinearImage;
|
||||
uint * d_compressedImage;
|
||||
|
||||
};
|
||||
|
||||
|
||||
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||
{
|
||||
#if defined HAVE_CUDA
|
||||
const uint w = image->width();
|
||||
const uint h = image->height();
|
||||
|
||||
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
|
||||
const uint blockMax = 32768; // 49152, 65535
|
||||
|
||||
setupCompressKernelDXT1(compressionOptions.colorWeight.ptr());
|
||||
|
||||
ColorBlock rgba;
|
||||
Task task(min(blockNum, blockMax));
|
||||
|
||||
clock_t start = clock();
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
for (uint x = 0; x < w; x += 4) {
|
||||
|
||||
rgba.init(image, x, y);
|
||||
|
||||
task.addColorBlock(rgba);
|
||||
|
||||
if (task.isFull())
|
||||
{
|
||||
task.flush(outputOptions);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
task.flush(outputOptions);
|
||||
|
||||
clock_t end = clock();
|
||||
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
|
||||
|
||||
#else
|
||||
if (outputOptions.errorHandler != NULL)
|
||||
{
|
||||
outputOptions.errorHandler->error(Error_CudaError);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // 0
|
||||
|
@ -39,17 +39,22 @@ namespace nv
|
||||
|
||||
bool isValid() const;
|
||||
|
||||
void compressDXT1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
|
||||
|
||||
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
void compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||
|
||||
private:
|
||||
|
||||
uint * m_bitmapTable;
|
||||
uint * m_data;
|
||||
uint * m_result;
|
||||
|
||||
const Image * m_image;
|
||||
nvtt::AlphaMode m_alphaMode;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
Reference in New Issue
Block a user