diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp index e2f1b27..27fdb47 100644 --- a/src/nvtt/Context.cpp +++ b/src/nvtt/Context.cpp @@ -224,21 +224,28 @@ Compressor::Compressor() : m(*new Compressor::Private()) { // Select fastest CUDA device. int device = cuda::getFastestDevice(); - cuda::setDevice(device); - - m.cuda = new CudaCompressor(); - - if (!m.cuda->isValid()) + if (!cuda::setDevice(device)) { m.cudaEnabled = false; m.cuda = NULL; } + else + { + m.cuda = new CudaCompressor(); + + if (!m.cuda->isValid()) + { + m.cudaEnabled = false; + m.cuda = NULL; + } + } } } Compressor::~Compressor() { delete &m; + cuda::exit(); } @@ -254,15 +261,21 @@ void Compressor::enableCudaAcceleration(bool enable) { // Select fastest CUDA device. int device = cuda::getFastestDevice(); - cuda::setDevice(device); - - m.cuda = new CudaCompressor(); - - if (!m.cuda->isValid()) + if (!cuda::setDevice(device)) { m.cudaEnabled = false; m.cuda = NULL; } + else + { + m.cuda = new CudaCompressor(); + + if (!m.cuda->isValid()) + { + m.cudaEnabled = false; + m.cuda = NULL; + } + } } } diff --git a/src/nvtt/cuda/CudaCompressDXT.cpp b/src/nvtt/cuda/CudaCompressDXT.cpp index f3e0264..2823312 100644 --- a/src/nvtt/cuda/CudaCompressDXT.cpp +++ b/src/nvtt/cuda/CudaCompressDXT.cpp @@ -90,20 +90,34 @@ CudaCompressor::CudaCompressor() : m_bitmapTable(NULL), m_bitmapTableCTX(NULL), #if defined HAVE_CUDA // Allocate and upload bitmaps. cudaMalloc((void**) &m_bitmapTable, 992 * sizeof(uint)); + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + fprintf(stderr, "CUDA Error: %s\n", cudaGetErrorString(err)); + fflush(stderr); + nvDebugBreak(); + } + if (m_bitmapTable != NULL) { cudaMemcpy(m_bitmapTable, s_bitmapTable, 992 * sizeof(uint), cudaMemcpyHostToDevice); + if (cudaGetLastError() != cudaSuccess) nvDebugBreak(); } cudaMalloc((void**) &m_bitmapTableCTX, 704 * sizeof(uint)); + if (cudaGetLastError() != cudaSuccess) nvDebugBreak(); + if (m_bitmapTableCTX != NULL) { cudaMemcpy(m_bitmapTableCTX, s_bitmapTableCTX, 704 * sizeof(uint), cudaMemcpyHostToDevice); + if (cudaGetLastError() != cudaSuccess) nvDebugBreak(); } // Allocate scratch buffers. cudaMalloc((void**) &m_data, MAX_BLOCKS * 64U); + if (cudaGetLastError() != cudaSuccess) nvDebugBreak(); + cudaMalloc((void**) &m_result, MAX_BLOCKS * 8U); + if (cudaGetLastError() != cudaSuccess) nvDebugBreak(); #endif } @@ -114,14 +128,17 @@ CudaCompressor::~CudaCompressor() cudaFree(m_data); cudaFree(m_result); cudaFree(m_bitmapTable); + cudaFree(m_bitmapTableCTX); #endif } bool CudaCompressor::isValid() const { #if defined HAVE_CUDA - if (cudaGetLastError() != cudaSuccess) + cudaError_t err = cudaGetLastError(); + if (err != cudaSuccess) { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(err)); return false; } #endif diff --git a/src/nvtt/cuda/CudaUtils.cpp b/src/nvtt/cuda/CudaUtils.cpp index 324744a..7bb2b09 100644 --- a/src/nvtt/cuda/CudaUtils.cpp +++ b/src/nvtt/cuda/CudaUtils.cpp @@ -1,212 +1,239 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include "CudaUtils.h" - -#if defined HAVE_CUDA -#include -#include -#endif - -using namespace nv; -using namespace cuda; - -/* @@ Move this to win32 utils or somewhere else. -#if NV_OS_WIN32 - -#define WINDOWS_LEAN_AND_MEAN -#include - -static bool isWindowsVista() -{ - OSVERSIONINFO osvi; - osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - - ::GetVersionEx(&osvi); - return osvi.dwMajorVersion >= 6; -} - - -typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); - -static bool isWow32() -{ - LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); - - BOOL bIsWow64 = FALSE; - - if (NULL != fnIsWow64Process) - { - if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) - { - // Assume 32 bits. - return true; - } - } - - return !bIsWow64; -} - -#endif -*/ - - -static bool isCudaDriverAvailable(int version) -{ -#if defined HAVE_CUDA -#if NV_OS_WIN32 - Library nvcuda("nvcuda.dll"); -#else - Library nvcuda(NV_LIBRARY_NAME(cuda)); -#endif - - if (!nvcuda.isValid()) - { - return false; - } - - if (version >= 2000) - { - void * address = nvcuda.bindSymbol("cuStreamCreate"); - if (address == NULL) return false; - } - - if (version >= 2010) - { - void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); - if (address == NULL) return false; - } - - if (version >= 2020) - { - typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); - - PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); - if (driverGetVersion == NULL) return false; - - int driverVersion; - if (driverGetVersion(&driverVersion) != CUDA_SUCCESS) return false; - - return driverVersion >= version; - } -#endif // HAVE_CUDA - - return false; -} - - -/// Determine if CUDA is available. -bool nv::cuda::isHardwarePresent() -{ -#if defined HAVE_CUDA -#if NV_OS_WIN32 - //if (isWindowsVista()) return false; - //if (isWindowsVista() || !isWow32()) return false; -#endif - int count = deviceCount(); - if (count == 1) - { - // Make sure it's not an emulation device. - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - - // deviceProp.name != Device Emulation (CPU) - if (deviceProp.major == -1 || deviceProp.minor == -1) - { - return false; - } - - // Make sure that CUDA driver matches CUDA runtime. - if (!isCudaDriverAvailable(CUDART_VERSION)) - { - return false; - } - - // @@ Make sure that warp size == 32 - } - - return count > 0; -#else - return false; -#endif -} - -/// Get number of CUDA enabled devices. -int nv::cuda::deviceCount() -{ -#if defined HAVE_CUDA - int gpuCount = 0; - - cudaError_t result = cudaGetDeviceCount(&gpuCount); - - if (result == cudaSuccess) - { - return gpuCount; - } -#endif - return 0; -} - -int nv::cuda::getFastestDevice() -{ - int max_gflops_device = 0; -#if defined HAVE_CUDA - int max_gflops = 0; - - const int device_count = deviceCount(); - int current_device = 0; - while (current_device < device_count) - { - cudaDeviceProp device_properties; - cudaGetDeviceProperties(&device_properties, current_device); - int gflops = device_properties.multiProcessorCount * device_properties.clockRate; - - if (device_properties.major != -1 && device_properties.minor != -1) - { - if( gflops > max_gflops ) - { - max_gflops = gflops; - max_gflops_device = current_device; - } - } - - current_device++; - } -#endif - return max_gflops_device; -} - - -/// Activate the given devices. -bool nv::cuda::setDevice(int i) -{ - nvCheck(i < deviceCount()); -#if defined HAVE_CUDA - cudaError_t result = cudaSetDevice(i); - return result == cudaSuccess; -#else - return false; -#endif -} +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include +#include +#include "CudaUtils.h" + +#if defined HAVE_CUDA +#include +#include +#endif + +using namespace nv; +using namespace cuda; + +/* @@ Move this to win32 utils or somewhere else. +#if NV_OS_WIN32 + +#define WINDOWS_LEAN_AND_MEAN +#include + +static bool isWindowsVista() +{ + OSVERSIONINFO osvi; + osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + + ::GetVersionEx(&osvi); + return osvi.dwMajorVersion >= 6; +} + + +typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); + +static bool isWow32() +{ + LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); + + BOOL bIsWow64 = FALSE; + + if (NULL != fnIsWow64Process) + { + if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) + { + // Assume 32 bits. + return true; + } + } + + return !bIsWow64; +} + +#endif +*/ + + +static bool isCudaDriverAvailable(int version) +{ +#if defined HAVE_CUDA +#if NV_OS_WIN32 + Library nvcuda("nvcuda.dll"); +#else + Library nvcuda(NV_LIBRARY_NAME(cuda)); +#endif + + if (!nvcuda.isValid()) + { + nvDebug("*** CUDA driver not found.\n"); + return false; + } + + if (version >= 2000) + { + void * address = nvcuda.bindSymbol("cuStreamCreate"); + if (address == NULL) { + nvDebug("*** CUDA driver version < 2.0.\n"); + return false; + } + } + + if (version >= 2010) + { + void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); + if (address == NULL) { + nvDebug("*** CUDA driver version < 2.1.\n"); + return false; + } + } + + if (version >= 2020) + { + typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); + + PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); + if (driverGetVersion == NULL) { + nvDebug("*** CUDA driver version < 2.2.\n"); + return false; + } + + int driverVersion; + CUresult err = driverGetVersion(&driverVersion); + if (err != CUDA_SUCCESS) { + nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err)); + return false; + } + + return driverVersion >= version; + } +#endif // HAVE_CUDA + + return true; +} + + +/// Determine if CUDA is available. +bool nv::cuda::isHardwarePresent() +{ +#if defined HAVE_CUDA + // Make sure that CUDA driver matches CUDA runtime. + if (!isCudaDriverAvailable(CUDART_VERSION)) + { + nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION); + return false; + } + + int count = deviceCount(); + if (count == 1) + { + // Make sure it's not an emulation device. + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + + // deviceProp.name != Device Emulation (CPU) + if (deviceProp.major == -1 || deviceProp.minor == -1) + { + return false; + } + } + + // @@ Make sure that warp size == 32 + + return count > 0; +#else + return false; +#endif +} + +/// Get number of CUDA enabled devices. +int nv::cuda::deviceCount() +{ +#if defined HAVE_CUDA + int gpuCount = 0; + + cudaError_t result = cudaGetDeviceCount(&gpuCount); + + if (result == cudaSuccess) + { + return gpuCount; + } +#endif + return 0; +} + +int nv::cuda::getFastestDevice() +{ + int max_gflops_device = 0; +#if defined HAVE_CUDA + int max_gflops = 0; + + const int device_count = deviceCount(); + int current_device = 0; + while (current_device < device_count) + { + cudaDeviceProp device_properties; + cudaGetDeviceProperties(&device_properties, current_device); + int gflops = device_properties.multiProcessorCount * device_properties.clockRate; + + if (device_properties.major != -1 && device_properties.minor != -1) + { + if( gflops > max_gflops ) + { + max_gflops = gflops; + max_gflops_device = current_device; + } + } + + current_device++; + } +#endif + return max_gflops_device; +} + + +/// Activate the given devices. +bool nv::cuda::setDevice(int i) +{ + nvCheck(i < deviceCount()); +#if defined HAVE_CUDA + cudaError_t result = cudaSetDevice(i); + + if (result != cudaSuccess) { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); + } + + return result == cudaSuccess; +#else + return false; +#endif +} + +void nv::cuda::exit() +{ +#if defined HAVE_CUDA + cudaError_t result = cudaThreadExit(); + + if (result != cudaSuccess) { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); + } +#endif +} diff --git a/src/nvtt/cuda/CudaUtils.h b/src/nvtt/cuda/CudaUtils.h index c284401..376bbe1 100644 --- a/src/nvtt/cuda/CudaUtils.h +++ b/src/nvtt/cuda/CudaUtils.h @@ -30,9 +30,10 @@ namespace nv namespace cuda { bool isHardwarePresent(); - int deviceCount(); + int deviceCount(); int getFastestDevice(); bool setDevice(int i); + void exit(); }; } // nv namespace