From 93625c7de8b95680a49196c6e35c9eb5f7eb1bcc Mon Sep 17 00:00:00 2001 From: castano Date: Wed, 4 Nov 2009 06:16:03 +0000 Subject: [PATCH] Do not fail when the process is already using CUDA. Attempt to use the selected cuda device. More strict device selection. --- src/nvtt/Compressor.cpp | 156 +++++------ src/nvtt/Compressor.h | 2 + src/nvtt/cuda/CudaUtils.cpp | 539 ++++++++++++++++++++---------------- src/nvtt/cuda/CudaUtils.h | 6 +- src/nvtt/nvtt.h | 2 +- src/nvtt/tools/compress.cpp | 5 + 6 files changed, 390 insertions(+), 320 deletions(-) diff --git a/src/nvtt/Compressor.cpp b/src/nvtt/Compressor.cpp index 9613fec..6631b91 100644 --- a/src/nvtt/Compressor.cpp +++ b/src/nvtt/Compressor.cpp @@ -53,7 +53,7 @@ using namespace nvtt; namespace { - + static int blockSize(Format format) { if (format == Format_DXT1 || format == Format_DXT1a) { @@ -121,7 +121,7 @@ namespace nvtt m_fixedImage = NULL; m_floatImage = image; } - + // Convert linear float image to fixed image ready for compression. void toFixedImage(const InputOptions::Private & inputOptions) @@ -153,7 +153,7 @@ namespace nvtt if (inputOptions.isNormalMap) { // Expand normals to [-1, 1] range. - // floatImage->expandNormals(0); + // floatImage->expandNormals(0); } else if (inputOptions.inputGamma != 1.0f) { @@ -193,7 +193,7 @@ namespace nvtt return m_fixedImage.ptr(); } - + private: const Image * m_inputImage; AutoPtr m_fixedImage; @@ -207,28 +207,16 @@ Compressor::Compressor() : m(*new Compressor::Private()) { // CUDA initialization. m.cudaSupported = cuda::isHardwarePresent(); - m.cudaEnabled = m.cudaSupported; - - if (m.cudaEnabled) - { - // Select fastest CUDA device. - int device = cuda::getFastestDevice(); - cuda::setDevice(device); - - m.cuda = new CudaCompressor(); + m.cudaEnabled = false; + m.cudaDevice = -1; - if (!m.cuda->isValid()) - { - m.cudaEnabled = false; - m.cuda = NULL; - } - } + enableCudaAcceleration(m.cudaSupported); } Compressor::~Compressor() { + enableCudaAcceleration(false); delete &m; - cuda::exit(); } @@ -237,21 +225,33 @@ void Compressor::enableCudaAcceleration(bool enable) { if (m.cudaSupported) { - m.cudaEnabled = enable; - } - - if (m.cudaEnabled && m.cuda == NULL) - { - // Select fastest CUDA device. - int device = cuda::getFastestDevice(); - cuda::setDevice(device); - - m.cuda = new CudaCompressor(); - - if (!m.cuda->isValid()) + if (m.cudaEnabled && !enable) { m.cudaEnabled = false; m.cuda = NULL; + + if (m.cudaDevice != -1) + { + // Exit device. + cuda::exitDevice(); + } + } + else if (!m.cudaEnabled && enable) + { + // Init the CUDA device. This may return -1 if CUDA was already initialized by the app. + m.cudaEnabled = cuda::initDevice(&m.cudaDevice); + + if (m.cudaEnabled) + { + // Create compressor if initialization succeeds. + m.cuda = new CudaCompressor(); + + // But cleanup if failed. + if (!m.cuda->isValid()) + { + enableCudaAcceleration(false); + } + } } } } @@ -292,9 +292,9 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen); return false; } - + inputOptions.computeTargetExtents(); - + // Output DDS header. if (!outputHeader(inputOptions, compressionOptions, outputOptions)) { @@ -310,7 +310,7 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c } outputOptions.closeFile(); - + return true; } @@ -325,15 +325,15 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption } DDSHeader header; - + header.setWidth(inputOptions.targetWidth); header.setHeight(inputOptions.targetHeight); - + int mipmapCount = inputOptions.realMipmapCount(); nvDebugCheck(mipmapCount > 0); - + header.setMipmapCount(mipmapCount); - + if (inputOptions.textureType == TextureType_2D) { header.setTexture2D(); } @@ -341,10 +341,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption header.setTextureCube(); } /*else if (inputOptions.textureType == TextureType_3D) { - header.setTexture3D(); - header.setDepth(inputOptions.targetDepth); + header.setTexture3D(); + header.setDepth(inputOptions.targetDepth); }*/ - + if (compressionOptions.format == Format_RGBA) { header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount)); @@ -353,7 +353,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption else { header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format)); - + if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) { header.setFourCC('D', 'X', 'T', '1'); if (inputOptions.isNormalMap) header.setNormalFlag(true); @@ -376,10 +376,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption if (inputOptions.isNormalMap) header.setNormalFlag(true); } } - + // Swap bytes if necessary. header.swapBytes(); - + uint headerSize = 128; if (header.hasDX10Header()) { @@ -392,7 +392,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption { outputOptions.errorHandler->error(Error_FileWrite); } - + return writeSucceed; } @@ -428,7 +428,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & return false; } } - + quantizeMipmap(mipmap, compressionOptions); compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions); @@ -438,7 +438,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private & h = max(1U, h / 2); d = max(1U, d / 2); } - + return true; } @@ -489,7 +489,7 @@ int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOpti { int idx = f * inputOptions.mipmapCount + m; const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx]; - + if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d)) { if (inputImage.data != NULL) @@ -544,7 +544,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions:: mipmap.toFloatImage(inputOptions); const FloatImage * floatImage = mipmap.asFloatImage(); - + if (inputOptions.mipmapFilter == MipmapFilter_Box) { // Use fast downsample. @@ -562,7 +562,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions:: filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch); mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode)); } - + // Normalize mipmap. if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps) { @@ -590,7 +590,7 @@ void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions: if (inputOptions.convertToNormalMap) { mipmap.toFixedImage(inputOptions); - + Vector4 heightScale = inputOptions.heightFactors; mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale)); } @@ -715,29 +715,29 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio #endif #if defined(HAVE_ATITC) - if (compressionOptions.externalCompressor == "ati") - { - atiCompressDXT1(image, outputOptions); - } - else -#endif - if (compressionOptions.quality == Quality_Fastest) - { - fast.compressDXT1(outputOptions); - } - else - { - if (useCuda) + if (compressionOptions.externalCompressor == "ati") { - nvDebugCheck(cudaSupported); - cuda->setImage(image, inputOptions.alphaMode); - cuda->compressDXT1(compressionOptions, outputOptions); + atiCompressDXT1(image, outputOptions); } else - { - slow.compressDXT1(compressionOptions, outputOptions); - } - } +#endif + if (compressionOptions.quality == Quality_Fastest) + { + fast.compressDXT1(outputOptions); + } + else + { + if (useCuda) + { + nvDebugCheck(cudaSupported); + cuda->setImage(image, inputOptions.alphaMode); + cuda->compressDXT1(compressionOptions, outputOptions); + } + else + { + slow.compressDXT1(compressionOptions, outputOptions); + } + } } else if (compressionOptions.format == Format_DXT1a) { @@ -828,27 +828,27 @@ int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions const uint bitCount = compressionOptions.bitcount; inputOptions.computeTargetExtents(); - + uint mipmapCount = inputOptions.realMipmapCount(); - + int size = 0; - + for (uint f = 0; f < inputOptions.faceCount; f++) { uint w = inputOptions.targetWidth; uint h = inputOptions.targetHeight; uint d = inputOptions.targetDepth; - + for (uint m = 0; m < mipmapCount; m++) { size += computeImageSize(w, h, d, bitCount, format); - + // Compute extents of next mipmap: w = max(1U, w / 2); h = max(1U, h / 2); d = max(1U, d / 2); } } - + return size; } diff --git a/src/nvtt/Compressor.h b/src/nvtt/Compressor.h index 55b9563..8737e29 100644 --- a/src/nvtt/Compressor.h +++ b/src/nvtt/Compressor.h @@ -63,10 +63,12 @@ namespace nvtt bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; + public: bool cudaSupported; bool cudaEnabled; + int cudaDevice; nv::AutoPtr cuda; diff --git a/src/nvtt/cuda/CudaUtils.cpp b/src/nvtt/cuda/CudaUtils.cpp index 7bb2b09..3cca04d 100644 --- a/src/nvtt/cuda/CudaUtils.cpp +++ b/src/nvtt/cuda/CudaUtils.cpp @@ -1,239 +1,300 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include -#include -#include "CudaUtils.h" - -#if defined HAVE_CUDA -#include -#include -#endif - -using namespace nv; -using namespace cuda; - -/* @@ Move this to win32 utils or somewhere else. -#if NV_OS_WIN32 - -#define WINDOWS_LEAN_AND_MEAN -#include - -static bool isWindowsVista() -{ - OSVERSIONINFO osvi; - osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - - ::GetVersionEx(&osvi); - return osvi.dwMajorVersion >= 6; -} - - -typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); - -static bool isWow32() -{ - LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); - - BOOL bIsWow64 = FALSE; - - if (NULL != fnIsWow64Process) - { - if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) - { - // Assume 32 bits. - return true; - } - } - - return !bIsWow64; -} - -#endif -*/ - - -static bool isCudaDriverAvailable(int version) -{ -#if defined HAVE_CUDA -#if NV_OS_WIN32 - Library nvcuda("nvcuda.dll"); -#else - Library nvcuda(NV_LIBRARY_NAME(cuda)); -#endif - - if (!nvcuda.isValid()) - { - nvDebug("*** CUDA driver not found.\n"); - return false; - } - - if (version >= 2000) - { - void * address = nvcuda.bindSymbol("cuStreamCreate"); - if (address == NULL) { - nvDebug("*** CUDA driver version < 2.0.\n"); - return false; - } - } - - if (version >= 2010) - { - void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); - if (address == NULL) { - nvDebug("*** CUDA driver version < 2.1.\n"); - return false; - } - } - - if (version >= 2020) - { - typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); - - PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); - if (driverGetVersion == NULL) { - nvDebug("*** CUDA driver version < 2.2.\n"); - return false; - } - - int driverVersion; - CUresult err = driverGetVersion(&driverVersion); - if (err != CUDA_SUCCESS) { - nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err)); - return false; - } - - return driverVersion >= version; - } -#endif // HAVE_CUDA - - return true; -} - - -/// Determine if CUDA is available. -bool nv::cuda::isHardwarePresent() -{ -#if defined HAVE_CUDA - // Make sure that CUDA driver matches CUDA runtime. - if (!isCudaDriverAvailable(CUDART_VERSION)) - { - nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION); - return false; - } - - int count = deviceCount(); - if (count == 1) - { - // Make sure it's not an emulation device. - cudaDeviceProp deviceProp; - cudaGetDeviceProperties(&deviceProp, 0); - - // deviceProp.name != Device Emulation (CPU) - if (deviceProp.major == -1 || deviceProp.minor == -1) - { - return false; - } - } - - // @@ Make sure that warp size == 32 - - return count > 0; -#else - return false; -#endif -} - -/// Get number of CUDA enabled devices. -int nv::cuda::deviceCount() -{ -#if defined HAVE_CUDA - int gpuCount = 0; - - cudaError_t result = cudaGetDeviceCount(&gpuCount); - - if (result == cudaSuccess) - { - return gpuCount; - } -#endif - return 0; -} - -int nv::cuda::getFastestDevice() -{ - int max_gflops_device = 0; -#if defined HAVE_CUDA - int max_gflops = 0; - - const int device_count = deviceCount(); - int current_device = 0; - while (current_device < device_count) - { - cudaDeviceProp device_properties; - cudaGetDeviceProperties(&device_properties, current_device); - int gflops = device_properties.multiProcessorCount * device_properties.clockRate; - - if (device_properties.major != -1 && device_properties.minor != -1) - { - if( gflops > max_gflops ) - { - max_gflops = gflops; - max_gflops_device = current_device; - } - } - - current_device++; - } -#endif - return max_gflops_device; -} - - -/// Activate the given devices. -bool nv::cuda::setDevice(int i) -{ - nvCheck(i < deviceCount()); -#if defined HAVE_CUDA - cudaError_t result = cudaSetDevice(i); - - if (result != cudaSuccess) { - nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); - } - - return result == cudaSuccess; -#else - return false; -#endif -} - -void nv::cuda::exit() -{ -#if defined HAVE_CUDA - cudaError_t result = cudaThreadExit(); - - if (result != cudaSuccess) { - nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); - } -#endif -} +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include +#include +#include "CudaUtils.h" + +#if defined HAVE_CUDA +#include +#include +#endif + +using namespace nv; +using namespace cuda; + +/* @@ Move this to win32 utils or somewhere else. +#if NV_OS_WIN32 + +#define WINDOWS_LEAN_AND_MEAN +#include + +static bool isWindowsVista() +{ +OSVERSIONINFO osvi; +osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); + +::GetVersionEx(&osvi); +return osvi.dwMajorVersion >= 6; +} + + +typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL); + +static bool isWow32() +{ +LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process"); + +BOOL bIsWow64 = FALSE; + +if (NULL != fnIsWow64Process) +{ +if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64)) +{ +// Assume 32 bits. +return true; +} +} + +return !bIsWow64; +} + +#endif +*/ + + +static bool isCudaDriverAvailable(int version) +{ +#if defined HAVE_CUDA +#if NV_OS_WIN32 + Library nvcuda("nvcuda.dll"); +#else + Library nvcuda(NV_LIBRARY_NAME(cuda)); +#endif + + if (!nvcuda.isValid()) + { + nvDebug("*** CUDA driver not found.\n"); + return false; + } + + if (version >= 2000) + { + void * address = nvcuda.bindSymbol("cuStreamCreate"); + if (address == NULL) { + nvDebug("*** CUDA driver version < 2.0.\n"); + return false; + } + } + + if (version >= 2010) + { + void * address = nvcuda.bindSymbol("cuModuleLoadDataEx"); + if (address == NULL) { + nvDebug("*** CUDA driver version < 2.1.\n"); + return false; + } + } + + if (version >= 2020) + { + typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); + + PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); + if (driverGetVersion == NULL) { + nvDebug("*** CUDA driver version < 2.2.\n"); + return false; + } + + int driverVersion; + CUresult err = driverGetVersion(&driverVersion); + if (err != CUDA_SUCCESS) { + nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err)); + return false; + } + + return driverVersion >= version; + } +#endif // HAVE_CUDA + + return true; +} + + +/// Determine if CUDA is available. +bool nv::cuda::isHardwarePresent() +{ +#if defined HAVE_CUDA + // Make sure that CUDA driver matches CUDA runtime. + if (!isCudaDriverAvailable(CUDART_VERSION)) + { + nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION); + return false; + } + + int count = deviceCount(); + if (count == 1) + { + // Make sure it's not an emulation device. + cudaDeviceProp deviceProp; + cudaGetDeviceProperties(&deviceProp, 0); + + // deviceProp.name != Device Emulation (CPU) + if (deviceProp.major == -1 || deviceProp.minor == -1) + { + return false; + } + } + + // @@ Make sure that warp size == 32 + + // @@ Make sure available GPU is faster than the CPU. + + return count > 0; +#else + return false; +#endif +} + +/// Get number of CUDA enabled devices. +int nv::cuda::deviceCount() +{ +#if defined HAVE_CUDA + int gpuCount = 0; + + cudaError_t result = cudaGetDeviceCount(&gpuCount); + + if (result == cudaSuccess) + { + return gpuCount; + } +#endif + return 0; +} + + +// Make sure device meets requirements: +// - Not an emulation device. +// - Not an integrated device? +// - Faster than CPU. +bool nv::cuda::isValidDevice(int i) +{ +#if defined HAVE_CUDA + + cudaDeviceProp device_properties; + cudaGetDeviceProperties(&device_properties, i); + int gflops = device_properties.multiProcessorCount * device_properties.clockRate; + + if (device_properties.major == -1 || device_properties.minor == -1) { + // Emulation device. + return false; + } + +#if CUDART_VERSION >= 2030 // 2.3 + /*if (device_properties.integrated) + { + // Integrated devices. + return false; + }*/ +#endif + + return true; +#else + return false; +#endif +} + +int nv::cuda::getFastestDevice() +{ + int max_gflops_device = -1; +#if defined HAVE_CUDA + int max_gflops = 0; + + const int device_count = deviceCount(); + for (int i = 0; i < device_count; i++) + { + if (isValidDevice(i)) + { + cudaDeviceProp device_properties; + cudaGetDeviceProperties(&device_properties, i); + int gflops = device_properties.multiProcessorCount * device_properties.clockRate; + + if (gflops > max_gflops) + { + max_gflops = gflops; + max_gflops_device = i; + } + } + } +#endif + return max_gflops_device; +} + + +/// Activate the given devices. +bool nv::cuda::initDevice(int * device_ptr) +{ + nvDebugCheck(device_ptr != NULL); +#if defined HAVE_CUDA + +#if CUDART_VERSION >= 2030 // 2.3 + + // Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active. + cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield); + +#endif + + int device = getFastestDevice(); + + if (device == -1) + { + // No device is fast enough. + *device_ptr = -1; + return false; + } + + // Select CUDA device. + cudaError_t result = cudaSetDevice(device); + + if (result == cudaErrorSetOnActiveProcess) + { + int device; + result = cudaGetDevice(&device); + + *device_ptr = -1; // No device to cleanup. + return isValidDevice(device); // Return true if device is valid. + } + else if (result != cudaSuccess) + { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); + *device_ptr = -1; + return false; + } + + *device_ptr = device; + return true; +#else + return false; +#endif +} + +void nv::cuda::exitDevice() +{ +#if defined HAVE_CUDA + cudaError_t result = cudaThreadExit(); + + if (result != cudaSuccess) { + nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result)); + } +#endif +} diff --git a/src/nvtt/cuda/CudaUtils.h b/src/nvtt/cuda/CudaUtils.h index 376bbe1..7128b4d 100644 --- a/src/nvtt/cuda/CudaUtils.h +++ b/src/nvtt/cuda/CudaUtils.h @@ -32,8 +32,10 @@ namespace nv bool isHardwarePresent(); int deviceCount(); int getFastestDevice(); - bool setDevice(int i); - void exit(); + bool isValidDevice(int i); + + bool initDevice(int * device_ptr); + void exitDevice(); }; } // nv namespace diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index 698e533..b5d2e72 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -194,7 +194,7 @@ namespace nvtt // Describe the format of the input. NVTT_API void setFormat(InputFormat format); - // Set the way the input alpha channel is interpreted. @@ Not implemented! + // Set the way the input alpha channel is interpreted. NVTT_API void setAlphaMode(AlphaMode alphaMode); // Set gamma settings. diff --git a/src/nvtt/tools/compress.cpp b/src/nvtt/tools/compress.cpp index ea87ac3..a27d4fb 100644 --- a/src/nvtt/tools/compress.cpp +++ b/src/nvtt/tools/compress.cpp @@ -33,6 +33,9 @@ #include // clock +#include +#pragma comment(lib, "c:\\CUDA\\lib\\cudart.lib") + //#define WINDOWS_LEAN_AND_MEAN //#include // TIMER @@ -413,6 +416,8 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; } + cudaSetDevice(0); + nvtt::Compressor compressor; compressor.enableCudaAcceleration(!nocuda);