@ -27,15 +27,65 @@
using namespace nv ;
# if NV_OS_WIN32
typedef BOOL ( WINAPI * LPFN_GSI ) ( LPSYSTEM_INFO ) ;
typedef BOOL ( WINAPI * LPFN_ISWOW64PROCESS ) ( HANDLE , PBOOL ) ;
static bool isWow64 ( ) {
LPFN_ISWOW64PROCESS fnIsWow64Process = ( LPFN_ISWOW64PROCESS ) GetProcAddress ( GetModuleHandle ( " kernel32 " ) , " IsWow64Process " ) ;
BOOL wow64 = FALSE ;
if ( NULL ! = fnIsWow64Process ) {
if ( ! fnIsWow64Process ( GetCurrentProcess ( ) , & wow64 ) ) {
// If error, assume false.
}
}
return wow64 ! = 0 ;
}
// Find the number of cores in the system.
static void getSystemInfo ( SYSTEM_INFO * sysinfo ) {
BOOL success = FALSE ;
if ( isWow64 ( ) ) {
LPFN_GSI fnGetNativeSystemInfo = ( LPFN_GSI ) GetProcAddress ( GetModuleHandle ( TEXT ( " kernel32 " ) ) , " GetNativeSystemInfo " ) ;
if ( fnGetNativeSystemInfo ! = NULL ) {
success = fnGetNativeSystemInfo ( sysinfo ) ;
}
}
if ( ! success ) {
GetSystemInfo ( sysinfo ) ;
}
}
# endif // NV_OS_WIN32
// Find the number of logical processors in the system.
// Based on: http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
// @@ Distinguish between logical and physical cores?
uint nv : : hardwareThreadCount ( ) {
uint nv : : processorCount ( ) {
# if NV_OS_WIN32
SYSTEM_INFO sysinfo ;
GetSystemInfo ( & sysinfo ) ;
return sysinfo . dwNumberOfProcessors ;
getSystemInfo ( & sysinfo ) ;
//return sysinfo.dwNumberOfProcessors;
// Respect process affinity mask?
DWORD_PTR pam , sam ;
GetProcessAffinityMask ( GetCurrentProcess ( ) , & pam , & sam ) ;
// Count number of bits set in the processor affinity mask.
uint count = 0 ;
for ( int i = 0 ; i < sizeof ( DWORD_PTR ) * 8 ; i + + ) {
if ( pam & ( DWORD_PTR ( 1 ) < < i ) ) count + + ;
}
nvDebugCheck ( count < = sysinfo . dwNumberOfProcessors ) ;
return count ;
# elif NV_OS_ORBIS
return 6 ;
# elif NV_OS_XBOX
return 3 ; // or 6?
# elif NV_OS_LINUX // Linux, Solaris, & AIX
@ -72,10 +122,211 @@ uint nv::hardwareThreadCount() {
# endif
}
uint nv : : threadId ( ) {
# if NV_OS_WIN32
return GetCurrentThreadId ( ) ;
# else
return 0 ; // @@
# endif
}
}
// @@ If we are using less worker threads than processors and hyperthreading is available, we probably want to enumerate the logical processors
// so that the first cores of each processor goes first. This way, if say, we leave 2 hardware threads free, then we still have one worker
// thread on each physical processor.
// I believe that currently logical processors are enumerated in physical order, that is:
// 0 = thread a in physical core 0
// 1 = thread b in physical core 0
// 2 = thread a in physical core 1
// ... and so on ...
// I'm not sure we can actually rely on that. And in any case we should start detecting the number of physical processors, which appears to be a pain
// to do in a way that's compatible with newer i7 processors.
void nv : : lockThreadToProcessor ( int idx ) {
# if NV_OS_WIN32
//nvDebugCheck(idx < hardwareThreadCount());
#if 0
DWORD_PTR tam = 1 < < idx ;
# else
DWORD_PTR pam , sam ;
BOOL rc = GetProcessAffinityMask ( GetCurrentProcess ( ) , & pam , & sam ) ;
// Find the idx's bit set.
uint pidx = 0 ;
DWORD_PTR tam = 0 ;
for ( int i = 0 ; i < sizeof ( DWORD_PTR ) * 8 ; i + + ) {
DWORD_PTR mask = DWORD_PTR ( 1 ) < < i ;
if ( pam & mask ) {
if ( pidx = = idx ) {
tam = mask ;
break ;
}
pidx + + ;
}
}
nvDebugCheck ( tam ! = 0 ) ;
# endif
SetThreadAffinityMask ( GetCurrentThread ( ) , tam ) ;
# else
// @@ NOP
# endif
}
void nv : : unlockThreadToProcessor ( ) {
# if NV_OS_WIN32
DWORD_PTR pam , sam ;
BOOL rc = GetProcessAffinityMask ( GetCurrentProcess ( ) , & pam , & sam ) ;
SetThreadAffinityMask ( GetCurrentThread ( ) , pam ) ;
# else
// @@ NOP
# endif
}
uint nv : : logicalProcessorCount ( ) {
return processorCount ( ) ;
}
# if NV_OS_WIN32
struct LOGICALPROCESSORDATA
{
unsigned int nLargestStandardFunctionNumber ;
unsigned int nLargestExtendedFunctionNumber ;
int nLogicalProcessorCount ;
int nLocalApicId ;
int nCPUcore ;
int nProcessorId ;
int nApicIdCoreIdSize ;
int nNC ;
int nMNC ;
int nCPUCoresperProcessor ;
int nThreadsperCPUCore ;
int nProcId ;
int nCoreId ;
bool CmpLegacy ;
bool HTT ;
} ;
# define MAX_NUMBER_OF_LOGICAL_PROCESSORS 96
# define MAX_NUMBER_OF_PHYSICAL_PROCESSORS 8
# define MAX_NUMBER_OF_IOAPICS 16
static LOGICALPROCESSORDATA LogicalProcessorMap [ MAX_NUMBER_OF_LOGICAL_PROCESSORS ] ;
static int PhysProcIds [ MAX_NUMBER_OF_PHYSICAL_PROCESSORS + MAX_NUMBER_OF_IOAPICS ] ;
static void gatherProcessorData ( LOGICALPROCESSORDATA * p ) {
int CPUInfo [ 4 ] = { 0 , 0 , 0 , 0 } ;
__cpuid ( CPUInfo , 0 ) ;
p - > nLargestStandardFunctionNumber = CPUInfo [ 0 ] ;
// Get the information associated with each valid Id
for ( uint i = 0 ; i < = p - > nLargestStandardFunctionNumber ; + + i ) {
__cpuid ( CPUInfo , i ) ;
// Interpret CPU feature information.
if ( i = = 1 ) {
// Some of the bits of LocalApicId represent the CPU core
// within a processor and other bits represent the processor ID.
p - > nLocalApicId = ( CPUInfo [ 1 ] > > 24 ) & 0xff ;
p - > HTT = ( CPUInfo [ 3 ] > > 28 ) & 0x1 ;
// recalculate later after 0x80000008
p - > nLogicalProcessorCount = ( CPUInfo [ 1 ] > > 16 ) & 0x0FF ;
}
}
// Calling __cpuid with 0x80000000 as the InfoType argument
// gets the number of valid extended IDs.
__cpuid ( CPUInfo , 0x80000000 ) ;
p - > nLargestExtendedFunctionNumber = CPUInfo [ 0 ] ;
// Get the information associated with each extended ID.
for ( uint i = 0x80000000 ; i < = p - > nLargestExtendedFunctionNumber ; + + i ) {
__cpuid ( CPUInfo , i ) ;
if ( i = = 0x80000008 ) {
p - > nApicIdCoreIdSize = ( CPUInfo [ 2 ] > > 12 ) & 0xF ;
p - > nNC = ( CPUInfo [ 2 ] ) & 0x0FF ;
}
}
// MNC
// A value of zero for ApicIdCoreIdSize indicates that MNC is derived by this
// legacy formula: MNC = NC + 1
// A non-zero value of ApicIdCoreIdSize means that MNC is 2^ApicIdCoreIdSize
if ( p - > nApicIdCoreIdSize ) {
p - > nMNC = 2 ;
for ( uint j = p - > nApicIdCoreIdSize - 1 ; j > 0 ; j - - ) {
p - > nMNC = p - > nMNC * 2 ;
}
}
else {
p - > nMNC = p - > nNC + 1 ;
}
// If HTT==0, then LogicalProcessorCount is reserved, and the CPU contains
// one CPU core and the CPU core is single-threaded.
// If HTT==1 and CmpLegacy==1, LogicalProcessorCount represents the number of
// CPU cores per processor, where each CPU core is single-threaded. If HTT==1
// and CmpLegacy==0, then LogicalProcessorCount is the number of threads per
// processor, which is the number of cores times the number of threads per core.
// The number of cores is NC+1.
p - > nCPUCoresperProcessor = p - > nNC + 1 ;
p - > nThreadsperCPUCore = ( p - > HTT = = 0 ? 1 : ( p - > CmpLegacy = = 1 ? 1 : p - > nLogicalProcessorCount / p - > nCPUCoresperProcessor ) ) ;
// Calculate a mask for the core IDs
uint mask = 1 ;
uint numbits = 1 ;
if ( p - > nApicIdCoreIdSize ) {
numbits = p - > nApicIdCoreIdSize ;
for ( uint j = p - > nApicIdCoreIdSize ; j > 1 ; j - - ) {
mask = ( mask < < 1 ) + 1 ;
}
}
p - > nProcId = ( p - > nLocalApicId & ~ mask ) > > numbits ;
p - > nCoreId = p - > nLocalApicId & mask ;
}
uint nv : : physicalProcessorCount ( ) {
uint lpc = logicalProcessorCount ( ) ;
// Get info about each logical processor.
for ( uint i = 0 ; i < lpc ; i + + ) {
// Make sure thread doesn't change processor while we gather it's data.
lockThreadToProcessor ( i ) ;
gatherProcessorData ( & LogicalProcessorMap [ i ] ) ;
}
unlockThreadToProcessor ( ) ;
memset ( PhysProcIds , 0 , sizeof ( PhysProcIds ) ) ;
for ( uint i = 0 ; i < lpc ; i + + ) {
PhysProcIds [ LogicalProcessorMap [ i ] . nProcId ] + + ;
}
uint pc = 0 ;
for ( uint i = 0 ; i < ( MAX_NUMBER_OF_PHYSICAL_PROCESSORS + MAX_NUMBER_OF_IOAPICS ) ; i + + ) {
if ( PhysProcIds [ i ] ! = 0 ) {
pc + + ;
}
}
return pc ;
}
# else
uint nv : : physicalProcessorCount ( ) {
// @@ Assume the same.
return processorCount ( ) ;
}
# endif