|
|
|
@ -48,7 +48,8 @@ using namespace nvtt;
|
|
|
|
|
|
|
|
|
|
#if defined HAVE_CUDA
|
|
|
|
|
|
|
|
|
|
#define MAX_BLOCKS 32768 // 49152, 65535
|
|
|
|
|
#define MAX_BLOCKS 8192U // 32768, 65535
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
extern "C" void setupCompressKernel(const float weights[3]);
|
|
|
|
|
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
|
|
|
|
@ -79,12 +80,15 @@ static void convertToBlockLinear(const Image * image, uint * blockLinearImage)
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
CudaCompressor::CudaCompressor()
|
|
|
|
|
CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_data(NULL), m_result(NULL)
|
|
|
|
|
{
|
|
|
|
|
#if defined HAVE_CUDA
|
|
|
|
|
// Allocate and upload bitmaps.
|
|
|
|
|
cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint));
|
|
|
|
|
cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
|
|
|
|
|
if (m_bitmapTable != NULL)
|
|
|
|
|
{
|
|
|
|
|
cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Allocate scratch buffers.
|
|
|
|
|
cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U);
|
|
|
|
@ -102,7 +106,16 @@ CudaCompressor::~CudaCompressor()
|
|
|
|
|
#endif
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
bool CudaCompressor::isValid() const
|
|
|
|
|
{
|
|
|
|
|
#if defined HAVE_CUDA
|
|
|
|
|
if (cudaGetLastError() != cudaSuccess)
|
|
|
|
|
{
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
#endif
|
|
|
|
|
return m_data != NULL && m_result != NULL && m_bitmapTable != NULL;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// @@ This code is very repetitive and needs to be cleaned up.
|
|
|
|
|
|
|
|
|
@ -119,7 +132,7 @@ void CudaCompressor::compressDXT1(const Image * image, const OutputOptions::Priv
|
|
|
|
|
|
|
|
|
|
uint imageSize = w * h * 16 * sizeof(Color32);
|
|
|
|
|
uint * blockLinearImage = (uint *) malloc(imageSize);
|
|
|
|
|
convertToBlockLinear(image, blockLinearImage); // @@ Do this on the GPU!
|
|
|
|
|
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
|
|
|
|
|
|
|
|
|
|
const uint blockNum = w * h;
|
|
|
|
|
const uint compressedSize = blockNum * 8;
|
|
|
|
|