60 Commits
2.0.4 ... 2.0.5

Author SHA1 Message Date
81a40d8abc Create 2.0.5 release. 2008-12-01 08:11:43 +00:00
a30490ab9b Preserve cluster location for empty partitions. 2008-11-24 10:35:42 +00:00
1ec115c7ec Cleanup 4 means compressor. 2008-11-24 10:35:07 +00:00
a4f56b65b8 Add support for alpha modes in the CPU compressors. Fixes issue 30. 2008-11-24 10:34:16 +00:00
bb69acec6c Add FileSystem to build. 2008-11-23 22:25:47 +00:00
4bbf5e96f4 Add squish external compressor.
Rename our squish version to nvsquish.
2008-11-23 08:59:56 +00:00
4a85f8e48d Remove executable flag. 2008-11-22 22:12:05 +00:00
f34b7ce84f Merge optimizations from squish. 2008-11-22 22:10:51 +00:00
010905edd3 Fix tabs. 2008-11-22 22:10:11 +00:00
7bb2d55d35 Create output directory. 2008-11-22 22:08:31 +00:00
e3a7cc19dd Add file system helper. 2008-11-22 22:07:07 +00:00
379605d30a Use metric to measure distance to clusters. 2008-11-22 21:32:27 +00:00
c05c4e155b Merge optimizations from official squish release. 2008-11-22 11:36:06 +00:00
fd73484bfc Merge optimizations from official squish relese. 2008-11-22 11:35:13 +00:00
f29d7dd938 Try using 4 means clustering. 2008-11-22 11:34:29 +00:00
3a5dc4783a Add support for regressions. 2008-11-22 11:33:31 +00:00
d4a713451e Fix 4 means clustering. 2008-11-22 11:32:51 +00:00
a302475fa6 Add fitting implementation to project. 2008-11-22 11:30:46 +00:00
6e988ea4c8 Add stream to the vc8 project. 2008-11-22 11:28:18 +00:00
7731181900 Update changelog. 2008-11-22 08:37:56 +00:00
41f6e0ba73 Try 4-means. 2008-11-22 08:37:14 +00:00
11073171a1 Rename stress to testsuite. Install target. 2008-11-22 08:36:55 +00:00
0805832b44 Remove squish build and project files. 2008-11-22 08:36:17 +00:00
a4dcd414ca Fix errors. 2008-11-22 08:35:04 +00:00
4ff8a83f90 Add fitting code to build. 2008-11-22 08:30:55 +00:00
48da357385 Add PCA, and 4-means implementation. 2008-11-22 08:30:20 +00:00
e1916d43c8 Do not mix tabs and spaces. 2008-11-22 00:14:05 +00:00
321f320bfb Do not print stupid messages. 2008-11-22 00:13:14 +00:00
df32fedc7c Add command line options.
Reformat output for easier parsing.
2008-11-21 09:09:57 +00:00
a7396b70ba Fix segfault on linux. Merged from 2.0 branch. 2008-11-21 09:08:00 +00:00
d9ca49cc5e Fix bug in dxt decompression.
Output files as TGA.
2008-11-21 08:06:25 +00:00
56849b78ad Output compressed files. 2008-11-21 07:58:49 +00:00
a769831fb5 Add const keyword to const arguments. 2008-11-21 07:57:28 +00:00
7486201a7e Fix bug in testsuite. 2008-11-21 07:45:09 +00:00
1813624992 Modify stress test to compress a list of real images. 2008-11-21 07:38:12 +00:00
5fa27adfcd Add custom error code and message for the case when container format does not support a certain output format. 2008-11-19 08:10:54 +00:00
6d1891a7e9 Remove gcc-4.3 warning. 2008-11-14 02:19:39 +00:00
8fb1d70d0b Fix bug detected by gcc-4.3. 2008-11-14 02:19:07 +00:00
c26c52d59c Fix gcc-4.3 warnings. 2008-11-14 02:18:35 +00:00
c3329d4675 Fix gcc-4.3 warning. 2008-11-14 02:16:36 +00:00
1cefc366f8 Remove unused function. 2008-11-14 02:15:35 +00:00
7df0885c4f Fix CUDA detection code on linux. 2008-11-14 01:15:36 +00:00
1c5da0e341 Do not use cuda API when CUDA not found.
Fix end of lines.
2008-11-10 21:54:03 +00:00
36ba75b598 Select fastest device. 2008-10-30 04:50:41 +00:00
1628831878 Fix comment. 2008-10-27 08:00:46 +00:00
12e774ea74 Fix cmake file. 2008-10-26 05:56:32 +00:00
66b18f2dbd Fix build under VC7 2008-10-22 03:48:06 +00:00
9ea1934097 Update vc8 projects. 2008-10-17 18:40:16 +00:00
9771e72702 Update vc9 projects. 2008-10-17 18:38:55 +00:00
7776bd5c17 Win32 fixes. 2008-10-17 18:37:17 +00:00
6d8a75462a Build nvtt as a shared library. 2008-10-16 22:22:45 +00:00
cf18077eda Prevent missmatches between incompatible versions of the CUDA runtime and the CUDA driver. 2008-10-16 22:21:21 +00:00
aa37e7a868 Add library loading helpers. 2008-10-16 22:20:31 +00:00
d01a5c1661 Workaround bug in CUDA runtime. When using CUDA 2.0, it's required to use a driver that supports CUDA 2.0. 2008-10-16 08:39:58 +00:00
36ed6bebda Update changelog with branch 2.0 fixes. 2008-10-15 07:17:20 +00:00
5234060618 Integrate branch 2.0 to trunk. 2008-10-15 07:16:57 +00:00
f402f28643 Use unsigned ints for stream sizes and positions. 2008-10-15 07:15:50 +00:00
f047043eb2 Fix compiler errors under gcc-4.3 2008-10-15 07:15:00 +00:00
7eac4195c4 Fix compiler errors under gcc-4.3 2008-10-15 07:14:25 +00:00
0f5692d1ea Compile CUDA files as C++. 2008-10-11 06:43:57 +00:00
23 changed files with 309 additions and 76 deletions

View File

@ -1,4 +1,4 @@
CMAKE_MINIMUM_REQUIRED(VERSION 2.4.0)
CMAKE_MINIMUM_REQUIRED(VERSION 2.6.0)
PROJECT(NV)
ENABLE_TESTING()
@ -16,6 +16,13 @@ MESSAGE(STATUS "Setting optimal options")
MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}")
MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}")
IF(NVTT_SHARED)
SET(NVCORE_SHARED TRUE)
SET(NVMATH_SHARED TRUE)
SET(NVIMAGE_SHARED TRUE)
ENDIF(NVTT_SHARED)
ADD_SUBDIRECTORY(src)
IF(WIN32)

View File

@ -1,3 +1,10 @@
NVIDIA Texture Tools version 2.0.5
* Fix error in single color compressor. Fixes issue 66.
* Detect mismatch between CUDA runtime and driver, and disable CUDA in that case.
* Fix cmake files when compiling NVTT as a shared library.
* When linking nvtt dynamically on unix, link all libraries dynamically.
* Select fastest CUDA device.
NVIDIA Texture Tools version 2.0.4
* Fix error in RGB format output; reported by jonsoh. See issue 49.
* Added support RGB format dithering by jonsoh. Fixes issue 50 and 51.

View File

@ -1 +1 @@
2.0.4
2.0.5

View File

@ -57,7 +57,7 @@ MARK_AS_ADVANCED (CUDA_FOUND CUDA_COMPILER CUDA_RUNTIME_LIBRARY)
#SET(CUDA_OPTIONS "-ncfe")
SET(CUDA_OPTIONS "")
SET(CUDA_OPTIONS "--host-compilation=C")
IF (CUDA_EMULATION)
SET (CUDA_OPTIONS "${CUDA_OPTIONS} -deviceemu")

2
configure vendored
View File

@ -53,7 +53,7 @@ echo "-- Configuring nvidia-texture-tools "`cat VERSION`
mkdir -p ./build
cd ./build
$CMAKE .. -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
$CMAKE .. -DNVTT_SHARED=1 -DCMAKE_BUILD_TYPE=$build -DCMAKE_INSTALL_PREFIX=$prefix -G "Unix Makefiles" || exit 1
cd ..
echo ""

View File

@ -281,6 +281,10 @@
RelativePath="..\..\..\src\nvcore\Debug.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Library.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Memory.cpp"
>
@ -315,6 +319,10 @@
RelativePath="..\..\..\src\nvcore\DefsVcWin32.h"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Library.h"
>
</File>
<File
RelativePath="..\..\..\src\nvcore\Memory.h"
>

View File

@ -53,8 +53,8 @@ END
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,0,4,0
PRODUCTVERSION 2,0,4,0
FILEVERSION 2,0,5,0
PRODUCTVERSION 2,0,5,0
FILEFLAGSMASK 0x17L
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 0, 4, 0"
VALUE "FileVersion", "2, 0, 5, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 0, 4, 0"
VALUE "ProductVersion", "2, 0, 5, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -19,13 +19,19 @@ SET(CORE_SRCS
TextWriter.h
TextWriter.cpp
Radix.h
Radix.cpp)
Radix.cpp
Library.h
Library.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# targets
ADD_DEFINITIONS(-DNVCORE_EXPORTS)
IF(UNIX)
SET(LIBS ${LIBS} ${CMAKE_DL_LIBS})
ENDIF(UNIX)
IF(NVCORE_SHARED)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED)

View File

@ -28,7 +28,7 @@
#endif
#if NV_OS_LINUX && defined(HAVE_EXECINFO_H)
# include <execinfo.h>
# include <execinfo.h> // backtrace
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h>
# endif
@ -39,6 +39,13 @@
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
# include <ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace
# if NV_CC_GNUC // defined(HAVE_CXXABI_H)
# include <cxxabi.h>
# endif
# endif
#endif
#include <stdexcept> // std::runtime_error
@ -128,6 +135,10 @@ namespace
#if defined(HAVE_EXECINFO_H) // NV_OS_LINUX
static bool nvHasStackTrace() {
return backtrace != NULL;
}
static void nvPrintStackTrace(void * trace[], int size, int start=0) {
char ** string_array = backtrace_symbols(trace, size);
@ -166,24 +177,36 @@ namespace
static void * callerAddress(void * secret)
{
# if NV_OS_DARWIN && NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.srr0;
# elif NV_OS_DARWIN && NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.eip;
# elif NV_CPU_X86_64
// #define REG_RIP REG_INDEX(rip) // seems to be 16
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[REG_RIP];
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
# elif NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext.regs->nip;
# if NV_OS_DARWIN
# if defined(_STRUCT_MCONTEXT)
# if NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->__ss.__srr0;
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->__ss.__eip;
# endif
# else
# if NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.srr0;
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext->ss.eip;
# endif
# endif
# else
return NULL;
# if NV_CPU_X86_64
// #define REG_RIP REG_INDEX(rip) // seems to be 16
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[REG_RIP];
# elif NV_CPU_X86
ucontext_t * ucp = (ucontext_t *)secret;
return (void *)ucp->uc_mcontext.gregs[14/*REG_EIP*/];
# elif NV_CPU_PPC
ucontext_t * ucp = (ucontext_t *)secret;
return (void *) ucp->uc_mcontext.regs->nip;
# endif
# endif
// How to obtain the instruction pointers in different platforms, from mlton's source code.
@ -228,17 +251,18 @@ namespace
}
# if defined(HAVE_EXECINFO_H)
if (nvHasStackTrace()) // in case of weak linking
{
void * trace[64];
int size = backtrace(trace, 64);
void * trace[64];
int size = backtrace(trace, 64);
if (pnt != NULL) {
// Overwrite sigaction with caller's address.
trace[1] = pnt;
if (pnt != NULL) {
// Overwrite sigaction with caller's address.
trace[1] = pnt;
}
nvPrintStackTrace(trace, size, 1);
}
nvPrintStackTrace(trace, size, 1);
# endif // defined(HAVE_EXECINFO_H)
exit(0);
@ -373,9 +397,12 @@ namespace
# endif
# if defined(HAVE_EXECINFO_H)
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 3);
if (nvHasStackTrace())
{
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 3);
}
# endif
// Exit cleanly.
@ -422,9 +449,12 @@ void NV_CDECL nvDebug(const char *msg, ...)
void debug::dumpInfo()
{
#if !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 1);
if (nvHasStackTrace())
{
void * trace[64];
int size = backtrace(trace, 64);
nvPrintStackTrace(trace, size, 1);
}
#endif
}

View File

@ -2,8 +2,7 @@
#error "Do not include this file directly."
#endif
#include <stdlib.h> // uint8_t, int8_t, ...
#include <stdint.h> // uint8_t, int8_t, ...
// Function linkage
#define DLL_IMPORT

View File

@ -19,7 +19,9 @@
// Set standard function names.
#define snprintf _snprintf
#define vsnprintf _vsnprintf
#if _MSC_VER < 1500
# define vsnprintf _vsnprintf
#endif
#define vsscanf _vsscanf
#define chdir _chdir
#define getcwd _getcwd

41
src/nvcore/Library.cpp Normal file
View File

@ -0,0 +1,41 @@
#include "Library.h"
#include "Debug.h"
#if NV_OS_WIN32
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
#else
#include <dlfcn.h>
#endif
void * nvLoadLibrary(const char * name)
{
#if NV_OS_WIN32
return (void *)LoadLibraryExA( name, NULL, 0 );
#else
return dlopen(name, RTLD_LAZY);
#endif
}
void nvUnloadLibrary(void * handle)
{
nvDebugCheck(handle != NULL);
#if NV_OS_WIN32
FreeLibrary((HMODULE)handle);
#else
dlclose(handle);
#endif
}
void * nvBindSymbol(void * handle, const char * symbol)
{
#if NV_OS_WIN32
return (void *)GetProcAddress((HMODULE)handle, symbol);
#else
return (void *)dlsym(handle, symbol);
#endif
}

50
src/nvcore/Library.h Normal file
View File

@ -0,0 +1,50 @@
// This code is in the public domain -- castano@gmail.com
#ifndef NV_CORE_LIBRARY_H
#define NV_CORE_LIBRARY_H
#include <nvcore/nvcore.h>
#if NV_OS_WIN32
#define LIBRARY_NAME(name) #name ".dll"
#elif NV_OS_DARWIN
#define NV_LIBRARY_NAME(name) "lib" #name ".dylib"
#else
#define NV_LIBRARY_NAME(name) "lib" #name ".so"
#endif
NVCORE_API void * nvLoadLibrary(const char * name);
NVCORE_API void nvUnloadLibrary(void * lib);
NVCORE_API void * nvBindSymbol(void * lib, const char * symbol);
class NVCORE_CLASS Library
{
public:
Library(const char * name)
{
handle = nvLoadLibrary(name);
}
~Library()
{
if (isValid())
{
nvUnloadLibrary(handle);
}
}
bool isValid() const
{
return handle != NULL;
}
void * bindSymbol(const char * symbol)
{
return nvBindSymbol(handle, symbol);
}
private:
void * handle;
};
#endif // NV_CORE_LIBRARY_H

View File

@ -24,7 +24,7 @@ __forceinline void nvPrefetch(const void * mem)
#else // NV_CC_MSVC
// do nothing in other case.
#define piPrefetch(ptr)
#define nvPrefetch(ptr)
#endif // NV_CC_MSVC

View File

@ -43,8 +43,11 @@ public:
/** Delete owned pointer and assign new one. */
void operator=( T * p ) {
delete m_ptr;
m_ptr = p;
if (p != m_ptr)
{
delete m_ptr;
m_ptr = p;
}
}
/** Member access. */
@ -249,14 +252,14 @@ public:
/** -> operator. */
BaseClass * operator -> () const
{
piCheck( m_ptr != NULL );
nvCheck( m_ptr != NULL );
return m_ptr;
}
/** * operator. */
BaseClass & operator*() const
{
piCheck( m_ptr != NULL );
nvCheck( m_ptr != NULL );
return *m_ptr;
}

View File

@ -14,7 +14,7 @@ namespace nv
uint strHash(const char * str, uint h) NV_PURE;
/// String hash vased on Bernstein's hash.
/// String hash based on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381)
{
uint i;
@ -213,9 +213,12 @@ namespace nv
/// Implement value semantics.
String & operator=( const String & str )
{
release();
data = str.data;
addRef();
if (str.data != data)
{
release();
data = str.data;
addRef();
}
return *this;
}

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (0 << 8) | (4); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (5); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;

View File

@ -108,7 +108,7 @@ public:
float area() const
{
const Vector3 d = extents();
return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
}
/// Get the volume of the box.
@ -118,6 +118,14 @@ public:
return 8.0f * (d.x() * d.y() * d.z());
}
/// Return true if the box contains the given point.
bool contains(Vector3::Arg p) const
{
return
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
private:
Vector3 m_mins;
@ -125,15 +133,6 @@ private:
};
/*
/// Point inside box test.
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
{
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
}
*/
} // nv namespace

View File

@ -43,8 +43,9 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED)
ADD_LIBRARY(nvtt SHARED ${DXT_SRCS})
IF(NVTT_SHARED)
ADD_DEFINITIONS(-DNVTT_SHARED=1)
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)

View File

@ -211,6 +211,10 @@ Compressor::Compressor() : m(*new Compressor::Private())
if (m.cudaEnabled)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
@ -237,6 +241,10 @@ void Compressor::enableCudaAcceleration(bool enable)
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())

View File

@ -191,6 +191,9 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
*sameColor = (axis == make_float3(0, 0, 0));
// Single color compressor needs unweighted colors.
if (*sameColor) colors[idx] = rawColors[idx];
dps[idx] = dot(rawColors[idx], axis);
#if __DEVICE_EMULATION__

View File

@ -22,6 +22,7 @@
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
@ -52,23 +53,52 @@ static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return true;
}
}
return !bIsWow64;
return !bIsWow64;
}
#endif
static bool isCudaDriverAvailable(uint version)
{
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
return false;
}
if (version > 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) return false;
}
if (version > 2010)
{
void * address = nvcuda.bindSymbol("cuLoadDataEx");
if (address == NULL) return false;
}
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
@ -90,6 +120,12 @@ bool nv::cuda::isHardwarePresent()
return false;
}
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
return false;
}
// @@ Make sure that warp size == 32
}
@ -115,6 +151,35 @@ int nv::cuda::deviceCount()
return 0;
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = 0;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
int current_device = 0;
while (current_device < device_count)
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
{
max_gflops = gflops;
max_gflops_device = current_device;
}
}
current_device++;
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{

View File

@ -31,6 +31,7 @@ namespace nv
{
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool setDevice(int i);
};