Precompute fast cluster fit factors, and store as static const.
nvtt is completely reentrant now. Fixes issue 37. cleanup interface of cuda compressors.
This commit is contained in:
parent
91eb30667f
commit
1df69495fc
|
@ -199,18 +199,6 @@ void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void nv::doPrecomputation()
|
|
||||||
{
|
|
||||||
static bool done = false; // @@ Stop using statics for reentrancy. Although the worst that could happen is that this stuff is precomputed multiple times.
|
|
||||||
|
|
||||||
if (!done)
|
|
||||||
{
|
|
||||||
done = true;
|
|
||||||
squish::FastClusterFit::DoPrecomputation();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
void nv::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
void nv::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
||||||
{
|
{
|
||||||
const uint w = image->width();
|
const uint w = image->width();
|
||||||
|
@ -219,8 +207,6 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
|
||||||
ColorBlock rgba;
|
ColorBlock rgba;
|
||||||
BlockDXT1 block;
|
BlockDXT1 block;
|
||||||
|
|
||||||
doPrecomputation();
|
|
||||||
|
|
||||||
//squish::WeightedClusterFit fit;
|
//squish::WeightedClusterFit fit;
|
||||||
//squish::ClusterFit fit;
|
//squish::ClusterFit fit;
|
||||||
squish::FastClusterFit fit;
|
squish::FastClusterFit fit;
|
||||||
|
@ -363,8 +349,6 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
|
||||||
ColorBlock rgba;
|
ColorBlock rgba;
|
||||||
BlockDXT5 block;
|
BlockDXT5 block;
|
||||||
|
|
||||||
doPrecomputation();
|
|
||||||
|
|
||||||
for (uint y = 0; y < h; y += 4) {
|
for (uint y = 0; y < h; y += 4) {
|
||||||
for (uint x = 0; x < w; x += 4) {
|
for (uint x = 0; x < w; x += 4) {
|
||||||
|
|
||||||
|
|
|
@ -725,7 +725,8 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||||
if (cudaEnabled)
|
if (cudaEnabled)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cudaSupported);
|
nvDebugCheck(cudaSupported);
|
||||||
cuda->compressDXT1(image, compressionOptions, outputOptions);
|
cuda->setImage(image, inputOptions.alphaMode);
|
||||||
|
cuda->compressDXT1(compressionOptions, outputOptions);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -757,7 +758,8 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||||
if (cudaEnabled)
|
if (cudaEnabled)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cudaSupported);
|
nvDebugCheck(cudaSupported);
|
||||||
cuda->compressDXT1n(image, compressionOptions, outputOptions);
|
cuda->setImage(image, inputOptions.alphaMode);
|
||||||
|
cuda->compressDXT1n(compressionOptions, outputOptions);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -775,7 +777,8 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||||
if (cudaEnabled)
|
if (cudaEnabled)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cudaSupported);
|
nvDebugCheck(cudaSupported);
|
||||||
cuda->compressDXT3(image, inputOptions, compressionOptions, outputOptions);
|
cuda->setImage(image, inputOptions.alphaMode);
|
||||||
|
cuda->compressDXT3(compressionOptions, outputOptions);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -794,7 +797,8 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||||
if (cudaEnabled)
|
if (cudaEnabled)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cudaSupported);
|
nvDebugCheck(cudaSupported);
|
||||||
cuda->compressDXT5(image, inputOptions, compressionOptions, outputOptions);
|
cuda->setImage(image, inputOptions.alphaMode);
|
||||||
|
cuda->compressDXT5(compressionOptions, outputOptions);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -826,7 +830,8 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
|
||||||
if (cudaEnabled)
|
if (cudaEnabled)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cudaSupported);
|
nvDebugCheck(cudaSupported);
|
||||||
cuda->compressCTX1(image, compressionOptions, outputOptions);
|
cuda->setImage(image, inputOptions.alphaMode);
|
||||||
|
cuda->compressCTX1(compressionOptions, outputOptions);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
|
|
@ -213,21 +213,27 @@ void CudaCompressor::compressKernel(CudaCompressionKernel * kernel)
|
||||||
#endif // 0
|
#endif // 0
|
||||||
|
|
||||||
|
|
||||||
|
void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
|
||||||
|
{
|
||||||
|
m_image = image;
|
||||||
|
m_alphaMode = alphaMode;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// Compress image using CUDA.
|
/// Compress image using CUDA.
|
||||||
void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cuda::isHardwarePresent());
|
nvDebugCheck(cuda::isHardwarePresent());
|
||||||
#if defined HAVE_CUDA
|
#if defined HAVE_CUDA
|
||||||
|
|
||||||
// Image size in blocks.
|
// Image size in blocks.
|
||||||
const uint w = (image->width() + 3) / 4;
|
const uint w = (m_image->width() + 3) / 4;
|
||||||
const uint h = (image->height() + 3) / 4;
|
const uint h = (m_image->height() + 3) / 4;
|
||||||
|
|
||||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||||
|
|
||||||
const uint blockNum = w * h;
|
const uint blockNum = w * h;
|
||||||
const uint compressedSize = blockNum * 8;
|
const uint compressedSize = blockNum * 8;
|
||||||
|
@ -286,18 +292,18 @@ void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions:
|
||||||
|
|
||||||
|
|
||||||
/// Compress image using CUDA.
|
/// Compress image using CUDA.
|
||||||
void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cuda::isHardwarePresent());
|
nvDebugCheck(cuda::isHardwarePresent());
|
||||||
#if defined HAVE_CUDA
|
#if defined HAVE_CUDA
|
||||||
|
|
||||||
// Image size in blocks.
|
// Image size in blocks.
|
||||||
const uint w = (image->width() + 3) / 4;
|
const uint w = (m_image->width() + 3) / 4;
|
||||||
const uint h = (image->height() + 3) / 4;
|
const uint h = (m_image->height() + 3) / 4;
|
||||||
|
|
||||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||||
convertToBlockLinear(image, blockLinearImage);
|
convertToBlockLinear(m_image, blockLinearImage);
|
||||||
|
|
||||||
const uint blockNum = w * h;
|
const uint blockNum = w * h;
|
||||||
const uint compressedSize = blockNum * 8;
|
const uint compressedSize = blockNum * 8;
|
||||||
|
@ -370,18 +376,18 @@ void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Priva
|
||||||
|
|
||||||
|
|
||||||
/// Compress image using CUDA.
|
/// Compress image using CUDA.
|
||||||
void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cuda::isHardwarePresent());
|
nvDebugCheck(cuda::isHardwarePresent());
|
||||||
#if defined HAVE_CUDA
|
#if defined HAVE_CUDA
|
||||||
|
|
||||||
// Image size in blocks.
|
// Image size in blocks.
|
||||||
const uint w = (image->width() + 3) / 4;
|
const uint w = (m_image->width() + 3) / 4;
|
||||||
const uint h = (image->height() + 3) / 4;
|
const uint h = (m_image->height() + 3) / 4;
|
||||||
|
|
||||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||||
convertToBlockLinear(image, blockLinearImage);
|
convertToBlockLinear(m_image, blockLinearImage);
|
||||||
|
|
||||||
const uint blockNum = w * h;
|
const uint blockNum = w * h;
|
||||||
const uint compressedSize = blockNum * 8;
|
const uint compressedSize = blockNum * 8;
|
||||||
|
@ -453,18 +459,18 @@ void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Priva
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void CudaCompressor::compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
void CudaCompressor::compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cuda::isHardwarePresent());
|
nvDebugCheck(cuda::isHardwarePresent());
|
||||||
#if defined HAVE_CUDA
|
#if defined HAVE_CUDA
|
||||||
|
|
||||||
// Image size in blocks.
|
// Image size in blocks.
|
||||||
const uint w = (image->width() + 3) / 4;
|
const uint w = (m_image->width() + 3) / 4;
|
||||||
const uint h = (image->height() + 3) / 4;
|
const uint h = (m_image->height() + 3) / 4;
|
||||||
|
|
||||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||||
|
|
||||||
const uint blockNum = w * h;
|
const uint blockNum = w * h;
|
||||||
const uint compressedSize = blockNum * 8;
|
const uint compressedSize = blockNum * 8;
|
||||||
|
@ -522,18 +528,18 @@ void CudaCompressor::compressDXT1n(const Image * image, const nvtt::CompressionO
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void CudaCompressor::compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
void CudaCompressor::compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||||
{
|
{
|
||||||
nvDebugCheck(cuda::isHardwarePresent());
|
nvDebugCheck(cuda::isHardwarePresent());
|
||||||
#if defined HAVE_CUDA
|
#if defined HAVE_CUDA
|
||||||
|
|
||||||
// Image size in blocks.
|
// Image size in blocks.
|
||||||
const uint w = (image->width() + 3) / 4;
|
const uint w = (m_image->width() + 3) / 4;
|
||||||
const uint h = (image->height() + 3) / 4;
|
const uint h = (m_image->height() + 3) / 4;
|
||||||
|
|
||||||
uint imageSize = w * h * 16 * sizeof(Color32);
|
uint imageSize = w * h * 16 * sizeof(Color32);
|
||||||
uint * blockLinearImage = (uint *) malloc(imageSize);
|
uint * blockLinearImage = (uint *) malloc(imageSize);
|
||||||
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
||||||
|
|
||||||
const uint blockNum = w * h;
|
const uint blockNum = w * h;
|
||||||
const uint compressedSize = blockNum * 8;
|
const uint compressedSize = blockNum * 8;
|
||||||
|
@ -590,186 +596,3 @@ void CudaCompressor::compressCTX1(const Image * image, const nvtt::CompressionOp
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
|
|
||||||
class Task
|
|
||||||
{
|
|
||||||
public:
|
|
||||||
explicit Task(uint numBlocks) : blockMaxCount(numBlocks), blockCount(0)
|
|
||||||
{
|
|
||||||
// System memory allocations.
|
|
||||||
blockLinearImage = new uint[blockMaxCount * 16];
|
|
||||||
xrefs = new uint[blockMaxCount * 16];
|
|
||||||
|
|
||||||
// Device memory allocations.
|
|
||||||
cudaMalloc((void**) &d_blockLinearImage, blockMaxCount * 16 * sizeof(uint));
|
|
||||||
cudaMalloc((void**) &d_compressedImage, blockMaxCount * 8U);
|
|
||||||
|
|
||||||
// @@ Check for allocation errors.
|
|
||||||
}
|
|
||||||
|
|
||||||
~Task()
|
|
||||||
{
|
|
||||||
delete [] blockLinearImage;
|
|
||||||
delete [] xrefs;
|
|
||||||
|
|
||||||
cudaFree(d_blockLinearImage);
|
|
||||||
cudaFree(d_compressedImage);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void addColorBlock(const ColorBlock & rgba)
|
|
||||||
{
|
|
||||||
nvDebugCheck(!isFull());
|
|
||||||
|
|
||||||
// @@ Count unique colors?
|
|
||||||
/*
|
|
||||||
// Convert colors to vectors.
|
|
||||||
Array<Vector3> pointArray(16);
|
|
||||||
|
|
||||||
for(int i = 0; i < 16; i++) {
|
|
||||||
const Color32 color = rgba.color(i);
|
|
||||||
pointArray.append(Vector3(color.r, color.g, color.b));
|
|
||||||
}
|
|
||||||
|
|
||||||
// Find best fit line.
|
|
||||||
const Vector3 axis = Fit::bestLine(pointArray).direction();
|
|
||||||
|
|
||||||
// Project points to axis.
|
|
||||||
float dps[16];
|
|
||||||
uint * order = &xrefs[blockCount * 16];
|
|
||||||
|
|
||||||
for (uint i = 0; i < 16; ++i)
|
|
||||||
{
|
|
||||||
dps[i] = dot(pointArray[i], axis);
|
|
||||||
order[i] = i;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Sort them.
|
|
||||||
for (uint i = 0; i < 16; ++i)
|
|
||||||
{
|
|
||||||
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
|
|
||||||
{
|
|
||||||
swap(dps[j], dps[j - 1]);
|
|
||||||
swap(order[j], order[j - 1]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
// Write sorted colors to blockLinearImage.
|
|
||||||
for(uint i = 0; i < 16; ++i)
|
|
||||||
{
|
|
||||||
// blockLinearImage[blockCount * 16 + i] = rgba.color(order[i]);
|
|
||||||
blockLinearImage[blockCount * 16 + i] = rgba.color(i);
|
|
||||||
}
|
|
||||||
|
|
||||||
++blockCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
bool isFull()
|
|
||||||
{
|
|
||||||
nvDebugCheck(blockCount <= blockMaxCount);
|
|
||||||
return blockCount == blockMaxCount;
|
|
||||||
}
|
|
||||||
|
|
||||||
void flush(const OutputOptions::Private & outputOptions)
|
|
||||||
{
|
|
||||||
if (blockCount == 0)
|
|
||||||
{
|
|
||||||
// Nothing to do.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy input color blocks.
|
|
||||||
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
|
|
||||||
|
|
||||||
// Launch kernel.
|
|
||||||
compressKernelDXT1(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
|
|
||||||
|
|
||||||
// Check for errors.
|
|
||||||
cudaError_t err = cudaGetLastError();
|
|
||||||
if (err != cudaSuccess)
|
|
||||||
{
|
|
||||||
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
|
|
||||||
|
|
||||||
if (outputOptions.errorHandler != NULL)
|
|
||||||
{
|
|
||||||
outputOptions.errorHandler->error(Error_CudaError);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Copy result to host, overwrite swizzled image.
|
|
||||||
uint * compressedImage = blockLinearImage;
|
|
||||||
cudaMemcpy(compressedImage, d_compressedImage, blockCount * 8, cudaMemcpyDeviceToHost);
|
|
||||||
|
|
||||||
// @@ Sort block indices.
|
|
||||||
|
|
||||||
// Output result.
|
|
||||||
if (outputOptions.outputHandler != NULL)
|
|
||||||
{
|
|
||||||
// outputOptions.outputHandler->writeData(compressedImage, blockCount * 8);
|
|
||||||
}
|
|
||||||
|
|
||||||
blockCount = 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
private:
|
|
||||||
|
|
||||||
const uint blockMaxCount;
|
|
||||||
uint blockCount;
|
|
||||||
|
|
||||||
uint * blockLinearImage;
|
|
||||||
uint * xrefs;
|
|
||||||
|
|
||||||
uint * d_blockLinearImage;
|
|
||||||
uint * d_compressedImage;
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
|
|
||||||
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
|
|
||||||
{
|
|
||||||
#if defined HAVE_CUDA
|
|
||||||
const uint w = image->width();
|
|
||||||
const uint h = image->height();
|
|
||||||
|
|
||||||
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
|
|
||||||
const uint blockMax = 32768; // 49152, 65535
|
|
||||||
|
|
||||||
setupCompressKernelDXT1(compressionOptions.colorWeight.ptr());
|
|
||||||
|
|
||||||
ColorBlock rgba;
|
|
||||||
Task task(min(blockNum, blockMax));
|
|
||||||
|
|
||||||
clock_t start = clock();
|
|
||||||
|
|
||||||
for (uint y = 0; y < h; y += 4) {
|
|
||||||
for (uint x = 0; x < w; x += 4) {
|
|
||||||
|
|
||||||
rgba.init(image, x, y);
|
|
||||||
|
|
||||||
task.addColorBlock(rgba);
|
|
||||||
|
|
||||||
if (task.isFull())
|
|
||||||
{
|
|
||||||
task.flush(outputOptions);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
task.flush(outputOptions);
|
|
||||||
|
|
||||||
clock_t end = clock();
|
|
||||||
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
|
|
||||||
|
|
||||||
#else
|
|
||||||
if (outputOptions.errorHandler != NULL)
|
|
||||||
{
|
|
||||||
outputOptions.errorHandler->error(Error_CudaError);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#endif // 0
|
|
||||||
|
|
|
@ -39,17 +39,22 @@ namespace nv
|
||||||
|
|
||||||
bool isValid() const;
|
bool isValid() const;
|
||||||
|
|
||||||
void compressDXT1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
|
||||||
void compressDXT3(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
|
||||||
void compressDXT5(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||||
void compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||||
void compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||||
|
void compressDXT1n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||||
|
void compressCTX1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
uint * m_bitmapTable;
|
uint * m_bitmapTable;
|
||||||
uint * m_data;
|
uint * m_data;
|
||||||
uint * m_result;
|
uint * m_result;
|
||||||
|
|
||||||
|
const Image * m_image;
|
||||||
|
nvtt::AlphaMode m_alphaMode;
|
||||||
};
|
};
|
||||||
|
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
|
113
src/nvtt/squish/extra/squishgen2.cpp
Normal file
113
src/nvtt/squish/extra/squishgen2.cpp
Normal file
|
@ -0,0 +1,113 @@
|
||||||
|
/* -----------------------------------------------------------------------------
|
||||||
|
|
||||||
|
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
|
||||||
|
Copyright (c) 2008 Ignacio Castano castano@gmail.com
|
||||||
|
|
||||||
|
Permission is hereby granted, free of charge, to any person obtaining
|
||||||
|
a copy of this software and associated documentation files (the
|
||||||
|
"Software"), to deal in the Software without restriction, including
|
||||||
|
without limitation the rights to use, copy, modify, merge, publish,
|
||||||
|
distribute, sublicense, and/or sell copies of the Software, and to
|
||||||
|
permit persons to whom the Software is furnished to do so, subject to
|
||||||
|
the following conditions:
|
||||||
|
|
||||||
|
The above copyright notice and this permission notice shall be included
|
||||||
|
in all copies or substantial portions of the Software.
|
||||||
|
|
||||||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||||
|
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||||
|
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||||
|
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||||
|
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||||
|
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||||
|
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
|
||||||
|
-------------------------------------------------------------------------- */
|
||||||
|
|
||||||
|
#include <stdio.h>
|
||||||
|
#include <float.h>
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
struct Precomp {
|
||||||
|
float alpha2_sum;
|
||||||
|
float beta2_sum;
|
||||||
|
float alphabeta_sum;
|
||||||
|
float factor;
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
int i = 0;
|
||||||
|
|
||||||
|
printf("struct Precomp {\n");
|
||||||
|
printf("\tfloat alpha2_sum;\n");
|
||||||
|
printf("\tfloat beta2_sum;\n");
|
||||||
|
printf("\tfloat alphabeta_sum;\n");
|
||||||
|
printf("\tfloat factor;\n");
|
||||||
|
printf("};\n\n");
|
||||||
|
|
||||||
|
printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n");
|
||||||
|
|
||||||
|
// Three element clusters:
|
||||||
|
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
|
||||||
|
{
|
||||||
|
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||||
|
{
|
||||||
|
int c2 = 16 - c0 - c1;
|
||||||
|
|
||||||
|
Precomp p;
|
||||||
|
p.alpha2_sum = c0 + c1 * 0.25f;
|
||||||
|
p.beta2_sum = c2 + c1 * 0.25f;
|
||||||
|
p.alphabeta_sum = c1 * 0.25f;
|
||||||
|
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
|
||||||
|
|
||||||
|
if (isfinite(p.factor))
|
||||||
|
{
|
||||||
|
printf("\t{ %f, %f, %f, %f }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("\t{ %f, %f, %f, INFINITY }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2);
|
||||||
|
}
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("}; // %d three cluster elements\n\n", i);
|
||||||
|
|
||||||
|
printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n");
|
||||||
|
|
||||||
|
// Four element clusters:
|
||||||
|
i = 0;
|
||||||
|
for( int c0 = 0; c0 <= 16; c0++)
|
||||||
|
{
|
||||||
|
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||||
|
{
|
||||||
|
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
||||||
|
{
|
||||||
|
int c3 = 16 - c0 - c1 - c2;
|
||||||
|
|
||||||
|
Precomp p;
|
||||||
|
p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
|
||||||
|
p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
|
||||||
|
p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
|
||||||
|
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
|
||||||
|
|
||||||
|
if (isfinite(p.factor))
|
||||||
|
{
|
||||||
|
printf("\t{ %f, %f, %f, %f }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
printf("\t{ %f, %f, %f, INFINITY }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3);
|
||||||
|
}
|
||||||
|
|
||||||
|
i++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
printf("}; // %d four cluster elements\n\n", i);
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
|
@ -29,6 +29,8 @@
|
||||||
#include "colourblock.h"
|
#include "colourblock.h"
|
||||||
#include <cfloat>
|
#include <cfloat>
|
||||||
|
|
||||||
|
#include "fastclusterlookup.inl"
|
||||||
|
|
||||||
namespace squish {
|
namespace squish {
|
||||||
|
|
||||||
FastClusterFit::FastClusterFit()
|
FastClusterFit::FastClusterFit()
|
||||||
|
@ -97,91 +99,6 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
struct Precomp {
|
|
||||||
float alpha2_sum;
|
|
||||||
float beta2_sum;
|
|
||||||
float alphabeta_sum;
|
|
||||||
float factor;
|
|
||||||
};
|
|
||||||
|
|
||||||
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
|
|
||||||
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
|
|
||||||
|
|
||||||
void FastClusterFit::DoPrecomputation()
|
|
||||||
{
|
|
||||||
int i = 0;
|
|
||||||
|
|
||||||
// Three element clusters:
|
|
||||||
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
|
|
||||||
{
|
|
||||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
|
||||||
{
|
|
||||||
int c2 = 16 - c0 - c1;
|
|
||||||
|
|
||||||
/*if (c2 == 16) {
|
|
||||||
// a = b = x2 / 16
|
|
||||||
s_threeElement[i].alpha2_sum = 0;
|
|
||||||
s_threeElement[i].beta2_sum = 16;
|
|
||||||
s_threeElement[i].alphabeta_sum = -16;
|
|
||||||
s_threeElement[i].factor = 1.0f / 256.0f;
|
|
||||||
}
|
|
||||||
else if (c0 == 16) {
|
|
||||||
// a = b = x0 / 16
|
|
||||||
s_threeElement[i].alpha2_sum = 16;
|
|
||||||
s_threeElement[i].beta2_sum = 0;
|
|
||||||
s_threeElement[i].alphabeta_sum = -16;
|
|
||||||
s_threeElement[i].factor = 1.0f / 256.0f;
|
|
||||||
}
|
|
||||||
else*/ {
|
|
||||||
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
|
|
||||||
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
|
|
||||||
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
|
|
||||||
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
|
|
||||||
}
|
|
||||||
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//printf("%d three cluster elements\n", i);
|
|
||||||
|
|
||||||
// Four element clusters:
|
|
||||||
i = 0;
|
|
||||||
for( int c0 = 0; c0 <= 16; c0++)
|
|
||||||
{
|
|
||||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
|
||||||
{
|
|
||||||
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
|
||||||
{
|
|
||||||
int c3 = 16 - c0 - c1 - c2;
|
|
||||||
|
|
||||||
/*if (c3 == 16) {
|
|
||||||
// a = b = x3 / 16
|
|
||||||
s_fourElement[i].alpha2_sum = 16.0f;
|
|
||||||
s_fourElement[i].beta2_sum = 0.0f;
|
|
||||||
s_fourElement[i].alphabeta_sum = -16.0f;
|
|
||||||
s_fourElement[i].factor = 1.0f / 256.0f;
|
|
||||||
}
|
|
||||||
else if (c0 == 16) {
|
|
||||||
// a = b = x0 / 16
|
|
||||||
s_fourElement[i].alpha2_sum = 0.0f;
|
|
||||||
s_fourElement[i].beta2_sum = 16.0f;
|
|
||||||
s_fourElement[i].alphabeta_sum = -16.0f;
|
|
||||||
s_fourElement[i].factor = 1.0f / 256.0f;
|
|
||||||
}
|
|
||||||
else*/ {
|
|
||||||
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
|
|
||||||
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
|
|
||||||
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
|
|
||||||
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
|
|
||||||
}
|
|
||||||
|
|
||||||
i++;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
//printf("%d four cluster elements\n", i);
|
|
||||||
}
|
|
||||||
|
|
||||||
void FastClusterFit::SetMetric(float r, float g, float b)
|
void FastClusterFit::SetMetric(float r, float g, float b)
|
||||||
{
|
{
|
||||||
#if SQUISH_USE_SIMD
|
#if SQUISH_USE_SIMD
|
||||||
|
|
|
@ -44,8 +44,6 @@ public:
|
||||||
void SetMetric(float r, float g, float b);
|
void SetMetric(float r, float g, float b);
|
||||||
float GetBestError() const;
|
float GetBestError() const;
|
||||||
|
|
||||||
static void DoPrecomputation();
|
|
||||||
|
|
||||||
// Make them public
|
// Make them public
|
||||||
virtual void Compress3( void* block );
|
virtual void Compress3( void* block );
|
||||||
virtual void Compress4( void* block );
|
virtual void Compress4( void* block );
|
||||||
|
|
1135
src/nvtt/squish/fastclusterlookup.inl
Normal file
1135
src/nvtt/squish/fastclusterlookup.inl
Normal file
File diff suppressed because it is too large
Load Diff
Loading…
Reference in New Issue
Block a user