Fix memory leaks.

Shutdown CUDA properly when nvtt context is destroyed.
Fixes issue 83.
This commit is contained in:
castano
2009-03-18 05:46:53 +00:00
parent 56543e1a92
commit 8529dcf755
4 changed files with 282 additions and 224 deletions

View File

@ -224,21 +224,28 @@ Compressor::Compressor() : m(*new Compressor::Private())
{ {
// Select fastest CUDA device. // Select fastest CUDA device.
int device = cuda::getFastestDevice(); int device = cuda::getFastestDevice();
cuda::setDevice(device); if (!cuda::setDevice(device))
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{ {
m.cudaEnabled = false; m.cudaEnabled = false;
m.cuda = NULL; m.cuda = NULL;
} }
else
{
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
} }
} }
Compressor::~Compressor() Compressor::~Compressor()
{ {
delete &m; delete &m;
cuda::exit();
} }
@ -254,15 +261,21 @@ void Compressor::enableCudaAcceleration(bool enable)
{ {
// Select fastest CUDA device. // Select fastest CUDA device.
int device = cuda::getFastestDevice(); int device = cuda::getFastestDevice();
cuda::setDevice(device); if (!cuda::setDevice(device))
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{ {
m.cudaEnabled = false; m.cudaEnabled = false;
m.cuda = NULL; m.cuda = NULL;
} }
else
{
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
} }
} }

View File

@ -90,20 +90,34 @@ CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_bitmapTableCTX(NULL),
#if defined HAVE_CUDA #if defined HAVE_CUDA
// Allocate and upload bitmaps. // Allocate and upload bitmaps.
cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint)); cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint));
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
fprintf(stderr, "CUDA Error: %s\n", cudaGetErrorString(err));
fflush(stderr);
nvDebugBreak();
}
if (m_bitmapTable != NULL) if (m_bitmapTable != NULL)
{ {
cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice); cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice);
if (cudaGetLastError() != cudaSuccess) nvDebugBreak();
} }
cudaMalloc((void**) &m_bitmapTableCTX, 704 * sizeof(uint)); cudaMalloc((void**) &m_bitmapTableCTX, 704 * sizeof(uint));
if (cudaGetLastError() != cudaSuccess) nvDebugBreak();
if (m_bitmapTableCTX != NULL) if (m_bitmapTableCTX != NULL)
{ {
cudaMemcpy(m_bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice); cudaMemcpy(m_bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice);
if (cudaGetLastError() != cudaSuccess) nvDebugBreak();
} }
// Allocate scratch buffers. // Allocate scratch buffers.
cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U); cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U);
if (cudaGetLastError() != cudaSuccess) nvDebugBreak();
cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U); cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U);
if (cudaGetLastError() != cudaSuccess) nvDebugBreak();
#endif #endif
} }
@ -114,14 +128,17 @@ CudaCompressor::~CudaCompressor()
cudaFree(m_data); cudaFree(m_data);
cudaFree(m_result); cudaFree(m_result);
cudaFree(m_bitmapTable); cudaFree(m_bitmapTable);
cudaFree(m_bitmapTableCTX);
#endif #endif
} }
bool CudaCompressor::isValid() const bool CudaCompressor::isValid() const
{ {
#if defined HAVE_CUDA #if defined HAVE_CUDA
if (cudaGetLastError() != cudaSuccess) cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{ {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(err));
return false; return false;
} }
#endif #endif

View File

@ -1,212 +1,239 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com> // Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
// //
// Permission is hereby granted, free of charge, to any person // Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation // obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without // files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use, // restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell // copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the // copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following // Software is furnished to do so, subject to the following
// conditions: // conditions:
// //
// The above copyright notice and this permission notice shall be // The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software. // included in all copies or substantial portions of the Software.
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h> #include <nvcore/Debug.h>
#include <nvcore/Library.h> #include <nvcore/Library.h>
#include "CudaUtils.h" #include "CudaUtils.h"
#if defined HAVE_CUDA #if defined HAVE_CUDA
#include <cuda.h> #include <cuda.h>
#include <cuda_runtime_api.h> #include <cuda_runtime_api.h>
#endif #endif
using namespace nv; using namespace nv;
using namespace cuda; using namespace cuda;
/* @@ Move this to win32 utils or somewhere else. /* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32 #if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN #define WINDOWS_LEAN_AND_MEAN
#include <windows.h> #include <windows.h>
static bool isWindowsVista() static bool isWindowsVista()
{ {
OSVERSIONINFO osvi; OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi); ::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6; return osvi.dwMajorVersion >= 6;
} }
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32() static bool isWow32()
{ {
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE; BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process) if (NULL != fnIsWow64Process)
{ {
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{ {
// Assume 32 bits. // Assume 32 bits.
return true; return true;
} }
} }
return !bIsWow64; return !bIsWow64;
} }
#endif #endif
*/ */
static bool isCudaDriverAvailable(int version) static bool isCudaDriverAvailable(int version)
{ {
#if defined HAVE_CUDA #if defined HAVE_CUDA
#if NV_OS_WIN32 #if NV_OS_WIN32
Library nvcuda("nvcuda.dll"); Library nvcuda("nvcuda.dll");
#else #else
Library nvcuda(NV_LIBRARY_NAME(cuda)); Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif #endif
if (!nvcuda.isValid()) if (!nvcuda.isValid())
{ {
return false; nvDebug("*** CUDA driver not found.\n");
} return false;
}
if (version >= 2000)
{ if (version >= 2000)
void * address = nvcuda.bindSymbol("cuStreamCreate"); {
if (address == NULL) return false; void * address = nvcuda.bindSymbol("cuStreamCreate");
} if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
if (version >= 2010) return false;
{ }
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); }
if (address == NULL) return false;
} if (version >= 2010)
{
if (version >= 2020) void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
{ if (address == NULL) {
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); nvDebug("*** CUDA driver version < 2.1.\n");
return false;
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); }
if (driverGetVersion == NULL) return false; }
int driverVersion; if (version >= 2020)
if (driverGetVersion(&driverVersion) != CUDA_SUCCESS) return false; {
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
return driverVersion >= version;
} PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
#endif // HAVE_CUDA if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false; return false;
} }
int driverVersion;
/// Determine if CUDA is available. CUresult err = driverGetVersion(&driverVersion);
bool nv::cuda::isHardwarePresent() if (err != CUDA_SUCCESS) {
{ nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
#if defined HAVE_CUDA return false;
#if NV_OS_WIN32 }
//if (isWindowsVista()) return false;
//if (isWindowsVista() || !isWow32()) return false; return driverVersion >= version;
#endif }
int count = deviceCount(); #endif // HAVE_CUDA
if (count == 1)
{ return true;
// Make sure it's not an emulation device. }
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
/// Determine if CUDA is available.
// deviceProp.name != Device Emulation (CPU) bool nv::cuda::isHardwarePresent()
if (deviceProp.major == -1 || deviceProp.minor == -1) {
{ #if defined HAVE_CUDA
return false; // Make sure that CUDA driver matches CUDA runtime.
} if (!isCudaDriverAvailable(CUDART_VERSION))
{
// Make sure that CUDA driver matches CUDA runtime. nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
if (!isCudaDriverAvailable(CUDART_VERSION)) return false;
{ }
return false;
} int count = deviceCount();
if (count == 1)
// @@ Make sure that warp size == 32 {
} // Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
return count > 0; cudaGetDeviceProperties(&deviceProp, 0);
#else
return false; // deviceProp.name != Device Emulation (CPU)
#endif if (deviceProp.major == -1 || deviceProp.minor == -1)
} {
return false;
/// Get number of CUDA enabled devices. }
int nv::cuda::deviceCount() }
{
#if defined HAVE_CUDA // @@ Make sure that warp size == 32
int gpuCount = 0;
return count > 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount); #else
return false;
if (result == cudaSuccess) #endif
{ }
return gpuCount;
} /// Get number of CUDA enabled devices.
#endif int nv::cuda::deviceCount()
return 0; {
} #if defined HAVE_CUDA
int gpuCount = 0;
int nv::cuda::getFastestDevice()
{ cudaError_t result = cudaGetDeviceCount(&gpuCount);
int max_gflops_device = 0;
#if defined HAVE_CUDA if (result == cudaSuccess)
int max_gflops = 0; {
return gpuCount;
const int device_count = deviceCount(); }
int current_device = 0; #endif
while (current_device < device_count) return 0;
{ }
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device); int nv::cuda::getFastestDevice()
int gflops = device_properties.multiProcessorCount * device_properties.clockRate; {
int max_gflops_device = 0;
if (device_properties.major != -1 && device_properties.minor != -1) #if defined HAVE_CUDA
{ int max_gflops = 0;
if( gflops > max_gflops )
{ const int device_count = deviceCount();
max_gflops = gflops; int current_device = 0;
max_gflops_device = current_device; while (current_device < device_count)
} {
} cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
current_device++; int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
}
#endif if (device_properties.major != -1 && device_properties.minor != -1)
return max_gflops_device; {
} if( gflops > max_gflops )
{
max_gflops = gflops;
/// Activate the given devices. max_gflops_device = current_device;
bool nv::cuda::setDevice(int i) }
{ }
nvCheck(i < deviceCount());
#if defined HAVE_CUDA current_device++;
cudaError_t result = cudaSetDevice(i); }
return result == cudaSuccess; #endif
#else return max_gflops_device;
return false; }
#endif
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
return result == cudaSuccess;
#else
return false;
#endif
}
void nv::cuda::exit()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}

View File

@ -30,9 +30,10 @@ namespace nv
namespace cuda namespace cuda
{ {
bool isHardwarePresent(); bool isHardwarePresent();
int deviceCount(); int deviceCount();
int getFastestDevice(); int getFastestDevice();
bool setDevice(int i); bool setDevice(int i);
void exit();
}; };
} // nv namespace } // nv namespace