Do not fail when the process is already using CUDA.

Attempt to use the selected cuda device. More strict device selection.
15 years ago · 93625c7de8
parent 6e9feef6f4
commit 93625c7de8
6 changed files with 390 additions and 320 deletions
--- a/src/nvtt/Compressor.cpp
+++ b/src/nvtt/Compressor.cpp
@ -207,28 +207,16 @@ Compressor::Compressor() : m(*new Compressor::Private())
 {
 	// CUDA initialization.
 	m.cudaSupported = cuda::isHardwarePresent();
-	m.cudaEnabled = m.cudaSupported;
-
-	if (m.cudaEnabled)
-	{
-		// Select fastest CUDA device.
-		int device = cuda::getFastestDevice();
-		cuda::setDevice(device);
-		
-		m.cuda = new CudaCompressor();
-
-		if (!m.cuda->isValid())
-		{
 	m.cudaEnabled = false;
-			m.cuda = NULL;
-		}
-	}
+	m.cudaDevice = -1;
+
+	enableCudaAcceleration(m.cudaSupported);
 }

 Compressor::~Compressor()
 {
+	enableCudaAcceleration(false);
 	delete &m;
-	cuda::exit();
 }


@ -237,21 +225,33 @@ void Compressor::enableCudaAcceleration(bool enable)
 {
 	if (m.cudaSupported)
 	{
-		m.cudaEnabled = enable;
-	}
+		if (m.cudaEnabled && !enable)
+		{
+			m.cudaEnabled = false;
+			m.cuda = NULL;

-	if (m.cudaEnabled && m.cuda == NULL)
+			if (m.cudaDevice != -1)
+			{
+				// Exit device.
+				cuda::exitDevice();
+			}
+		}
+		else if (!m.cudaEnabled && enable)
 		{
-		// Select fastest CUDA device.
-		int device = cuda::getFastestDevice();
-		cuda::setDevice(device);
+			// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
+			m.cudaEnabled = cuda::initDevice(&m.cudaDevice);

+			if (m.cudaEnabled)
+			{
+				// Create compressor if initialization succeeds.
 				m.cuda = new CudaCompressor();

+				// But cleanup if failed.
 				if (!m.cuda->isValid())
 				{
-			m.cudaEnabled = false;
-			m.cuda = NULL;
+					enableCudaAcceleration(false);
+				}
+			}
 		}
 	}
 }
--- a/src/nvtt/Compressor.h
+++ b/src/nvtt/Compressor.h
@ -63,10 +63,12 @@ namespace nvtt
 		bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;


+
 	public:

 		bool cudaSupported;
 		bool cudaEnabled;
+		int cudaDevice;

 		nv::AutoPtr<nv::CudaCompressor> cuda;

--- a/src/nvtt/cuda/CudaUtils.cpp
+++ b/src/nvtt/cuda/CudaUtils.cpp
@ -158,6 +158,8 @@ bool nv::cuda::isHardwarePresent()

 	// @@ Make sure that warp size == 32

+	// @@ Make sure available GPU is faster than the CPU.
+
 	return count > 0;
 #else
 	return false;
@ -180,30 +182,59 @@ int nv::cuda::deviceCount()
 	return 0;
 }

+
+// Make sure device meets requirements:
+// - Not an emulation device.
+// - Not an integrated device?
+// - Faster than CPU.
+bool nv::cuda::isValidDevice(int i)
+{
+#if defined HAVE_CUDA
+
+	cudaDeviceProp device_properties;
+	cudaGetDeviceProperties(&device_properties, i);
+	int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
+
+	if (device_properties.major == -1 || device_properties.minor == -1) {
+		// Emulation device.
+		return false;
+	}
+
+#if CUDART_VERSION >= 2030 // 2.3
+	/*if (device_properties.integrated)
+	{
+		// Integrated devices.
+		return false;
+	}*/
+#endif
+
+	return true;
+#else
+	return false;
+#endif
+}
+
 int nv::cuda::getFastestDevice()
 {
-	int max_gflops_device = 0;
+	int max_gflops_device = -1;
 #if defined HAVE_CUDA
 	int max_gflops = 0;

 	const int device_count = deviceCount();
-	int current_device = 0;
-	while (current_device < device_count)
+	for (int i = 0; i < device_count; i++)
+	{
+		if (isValidDevice(i))
 		{
 			cudaDeviceProp device_properties;
-		cudaGetDeviceProperties(&device_properties, current_device);
+			cudaGetDeviceProperties(&device_properties, i);
 			int gflops = device_properties.multiProcessorCount * device_properties.clockRate;

-		if (device_properties.major != -1 && device_properties.minor != -1)
-		{
 			if (gflops > max_gflops)
 			{
 				max_gflops = gflops;
-				max_gflops_device = current_device;
+				max_gflops_device = i;
 			}
 		}
-		
-		current_device++;
 	}
 #endif
 	return max_gflops_device;
@ -211,23 +242,53 @@ int nv::cuda::getFastestDevice()


 /// Activate the given devices.
-bool nv::cuda::setDevice(int i)
+bool nv::cuda::initDevice(int * device_ptr)
 {
-	nvCheck(i < deviceCount());
+	nvDebugCheck(device_ptr != NULL);
 #if defined HAVE_CUDA
-	cudaError_t result = cudaSetDevice(i);

-	if (result != cudaSuccess) {
+#if CUDART_VERSION >= 2030 // 2.3
+
+	// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
+	cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
+
+#endif
+
+	int device = getFastestDevice();
+
+	if (device == -1)
+	{
+		// No device is fast enough.
+		*device_ptr = -1;
+		return false;
+	}
+
+	// Select CUDA device.
+	cudaError_t result = cudaSetDevice(device);
+
+	if (result == cudaErrorSetOnActiveProcess)
+	{
+		int device;
+		result = cudaGetDevice(&device);
+
+		*device_ptr = -1;  // No device to cleanup.
+		return isValidDevice(device); // Return true if device is valid.
+	}
+	else if (result != cudaSuccess)
+	{
 		nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
+		*device_ptr = -1;
+		return false;
 	}

-	return result == cudaSuccess;
+	*device_ptr = device;
+	return true;
 #else
 	return false;
 #endif
 }

-void nv::cuda::exit()
+void nv::cuda::exitDevice()
 {
 #if defined HAVE_CUDA
 	cudaError_t result = cudaThreadExit();
--- a/src/nvtt/cuda/CudaUtils.h
+++ b/src/nvtt/cuda/CudaUtils.h
@ -32,8 +32,10 @@ namespace nv
 		bool isHardwarePresent();
 		int deviceCount();
 		int getFastestDevice();
-		bool setDevice(int i);
-		void exit();
+		bool isValidDevice(int i);
+
+		bool initDevice(int * device_ptr);
+		void exitDevice();
 	};
 	
 } // nv namespace
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@ -194,7 +194,7 @@ namespace nvtt
 		// Describe the format of the input.
 		NVTT_API void setFormat(InputFormat format);
 		
-		// Set the way the input alpha channel is interpreted. @@ Not implemented!
+		// Set the way the input alpha channel is interpreted.
 		NVTT_API void setAlphaMode(AlphaMode alphaMode);
 		
 		// Set gamma settings.
--- a/src/nvtt/tools/compress.cpp
+++ b/src/nvtt/tools/compress.cpp
@ -33,6 +33,9 @@

 #include <time.h> // clock

+#include <c:\CUDA\include\cuda_runtime.h>
+#pragma comment(lib, "c:\\CUDA\\lib\\cudart.lib")
+
 //#define WINDOWS_LEAN_AND_MEAN
 //#include <windows.h> // TIMER

@ -413,6 +416,8 @@ int main(int argc, char *argv[])
 		return EXIT_FAILURE;
 	}

+	cudaSetDevice(0);
+
 	nvtt::Compressor compressor;
 	compressor.enableCudaAcceleration(!nocuda);