diff --git a/src/nvtt/cuda/CudaCompressDXT.cpp b/src/nvtt/cuda/CudaCompressDXT.cpp index 9947ea0..5fc7f8c 100644 --- a/src/nvtt/cuda/CudaCompressDXT.cpp +++ b/src/nvtt/cuda/CudaCompressDXT.cpp @@ -48,7 +48,8 @@ using namespace nvtt; #if defined HAVE_CUDA -#define MAX_BLOCKS 32768 // 49152, 65535 +#define MAX_BLOCKS 8192U // 32768, 65535 + extern "C" void setupCompressKernel(const float weights[3]); extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); @@ -79,12 +80,15 @@ static void convertToBlockLinear(const Image * image, uint * blockLinearImage) #endif -CudaCompressor::CudaCompressor() +CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_data(NULL), m_result(NULL) { #if defined HAVE_CUDA // Allocate and upload bitmaps. cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint)); - cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice); + if (m_bitmapTable != NULL) + { + cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice); + } // Allocate scratch buffers. cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U); @@ -102,7 +106,16 @@ CudaCompressor::~CudaCompressor() #endif } - +bool CudaCompressor::isValid() const +{ +#if defined HAVE_CUDA + if (cudaGetLastError() != cudaSuccess) + { + return false; + } +#endif + return m_data != NULL && m_result != NULL && m_bitmapTable != NULL; +} // @@ This code is very repetitive and needs to be cleaned up. @@ -119,7 +132,7 @@ void CudaCompressor::compressDXT1(const Image * image, const OutputOptions::Priv uint imageSize = w * h * 16 * sizeof(Color32); uint * blockLinearImage = (uint *) malloc(imageSize); - convertToBlockLinear(image, blockLinearImage); // @@ Do this on the GPU! + convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! const uint blockNum = w * h; const uint compressedSize = blockNum * 8; diff --git a/src/nvtt/cuda/CudaCompressDXT.h b/src/nvtt/cuda/CudaCompressDXT.h index c98c4e4..8e67315 100644 --- a/src/nvtt/cuda/CudaCompressDXT.h +++ b/src/nvtt/cuda/CudaCompressDXT.h @@ -37,6 +37,8 @@ namespace nv CudaCompressor(); ~CudaCompressor(); + bool isValid() const; + void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);