More cleanup. Remove files that are not strictly required.
This commit is contained in:
@ -1,168 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_CORE_BITARRAY_H
|
||||
#define NV_CORE_BITARRAY_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
#include <nvcore/Containers.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
/// Count the bits of @a x.
|
||||
inline uint bitsSet(uint8 x) {
|
||||
uint count = 0;
|
||||
for(; x != 0; x >>= 1) {
|
||||
count += (x & 1);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/// Count the bits of @a x.
|
||||
inline uint bitsSet(uint32 x, int bits) {
|
||||
uint count = 0;
|
||||
for(; x != 0 && bits != 0; x >>= 1, bits--) {
|
||||
count += (x & 1);
|
||||
}
|
||||
return count;
|
||||
}
|
||||
|
||||
|
||||
/// Simple bit array.
|
||||
class BitArray
|
||||
{
|
||||
public:
|
||||
|
||||
/// Default ctor.
|
||||
BitArray() {}
|
||||
|
||||
/// Ctor with initial m_size.
|
||||
BitArray(uint sz)
|
||||
{
|
||||
resize(sz);
|
||||
}
|
||||
|
||||
/// Get array m_size.
|
||||
uint size() const { return m_size; }
|
||||
|
||||
/// Clear array m_size.
|
||||
void clear() { resize(0); }
|
||||
|
||||
/// Set array m_size.
|
||||
void resize(uint sz)
|
||||
{
|
||||
m_size = sz;
|
||||
m_bitArray.resize( (m_size + 7) >> 3 );
|
||||
}
|
||||
|
||||
/// Get bit.
|
||||
bool bitAt(uint b) const
|
||||
{
|
||||
nvDebugCheck( b < m_size );
|
||||
return (m_bitArray[b >> 3] & (1 << (b & 7))) != 0;
|
||||
}
|
||||
|
||||
/// Set a bit.
|
||||
void setBitAt(uint b)
|
||||
{
|
||||
nvDebugCheck( b < m_size );
|
||||
m_bitArray[b >> 3] |= (1 << (b & 7));
|
||||
}
|
||||
|
||||
/// Clear a bit.
|
||||
void clearBitAt( uint b )
|
||||
{
|
||||
nvDebugCheck( b < m_size );
|
||||
m_bitArray[b >> 3] &= ~(1 << (b & 7));
|
||||
}
|
||||
|
||||
/// Clear all the bits.
|
||||
void clearAll()
|
||||
{
|
||||
memset(m_bitArray.mutableBuffer(), 0, m_bitArray.size());
|
||||
}
|
||||
|
||||
/// Set all the bits.
|
||||
void setAll()
|
||||
{
|
||||
memset(m_bitArray.mutableBuffer(), 0xFF, m_bitArray.size());
|
||||
}
|
||||
|
||||
/// Toggle all the bits.
|
||||
void toggleAll()
|
||||
{
|
||||
const uint byte_num = m_bitArray.size();
|
||||
for(uint b = 0; b < byte_num; b++) {
|
||||
m_bitArray[b] ^= 0xFF;
|
||||
}
|
||||
}
|
||||
|
||||
/// Get a byte of the bit array.
|
||||
const uint8 & byteAt(uint index) const
|
||||
{
|
||||
return m_bitArray[index];
|
||||
}
|
||||
|
||||
/// Set the given byte of the byte array.
|
||||
void setByteAt(uint index, uint8 b)
|
||||
{
|
||||
m_bitArray[index] = b;
|
||||
}
|
||||
|
||||
/// Count the number of bits set.
|
||||
uint countSetBits() const
|
||||
{
|
||||
const uint num = m_bitArray.size();
|
||||
if( num == 0 ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint count = 0;
|
||||
for(uint i = 0; i < num - 1; i++) {
|
||||
count += bitsSet(m_bitArray[i]);
|
||||
}
|
||||
count += bitsSet(m_bitArray[num-1], m_size & 0x7);
|
||||
|
||||
//piDebugCheck(count + countClearBits() == m_size);
|
||||
return count;
|
||||
}
|
||||
|
||||
/// Count the number of bits clear.
|
||||
uint countClearBits() const {
|
||||
|
||||
const uint num = m_bitArray.size();
|
||||
if( num == 0 ) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint count = 0;
|
||||
for(uint i = 0; i < num - 1; i++) {
|
||||
count += bitsSet(~m_bitArray[i]);
|
||||
}
|
||||
count += bitsSet(~m_bitArray[num-1], m_size & 0x7);
|
||||
|
||||
//piDebugCheck(count + countSetBits() == m_size);
|
||||
return count;
|
||||
}
|
||||
|
||||
friend void swap(BitArray & a, BitArray & b)
|
||||
{
|
||||
swap(a.m_size, b.m_size);
|
||||
swap(a.m_bitArray, b.m_bitArray);
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
/// Number of bits stored.
|
||||
uint m_size;
|
||||
|
||||
/// Array of bits.
|
||||
Array<uint8> m_bitArray;
|
||||
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // _PI_CORE_BITARRAY_H_
|
@ -7,9 +7,6 @@ SET(CORE_SRCS
|
||||
DefsGnucWin32.h
|
||||
DefsVcWin32.h
|
||||
Ptr.h
|
||||
RefCounted.h
|
||||
RefCounted.cpp
|
||||
BitArray.h
|
||||
Memory.h
|
||||
Memory.cpp
|
||||
Debug.h
|
||||
@ -17,10 +14,6 @@ SET(CORE_SRCS
|
||||
Containers.h
|
||||
StrLib.h
|
||||
StrLib.cpp
|
||||
Radix.h
|
||||
Radix.cpp
|
||||
CpuInfo.h
|
||||
CpuInfo.cpp
|
||||
Algorithms.h
|
||||
Timer.h
|
||||
Library.h
|
||||
@ -31,41 +24,11 @@ SET(CORE_SRCS
|
||||
TextReader.cpp
|
||||
TextWriter.h
|
||||
TextWriter.cpp
|
||||
Tokenizer.h
|
||||
Tokenizer.cpp
|
||||
FileSystem.h
|
||||
FileSystem.cpp)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
# For Windows64 in MSVC we need to add the assembly version of vsscanf
|
||||
IF(MSVC AND NV_SYSTEM_PROCESSOR STREQUAL "AMD64")
|
||||
SET(VSSCANF_ASM_NAME "vsscanf_proxy_win64")
|
||||
IF(MSVC_IDE)
|
||||
# $(IntDir) is a macro expanded to the intermediate directory of the selected solution configuration
|
||||
SET(VSSCANF_ASM_INTDIR "$(IntDir)")
|
||||
ELSE(MSVC_IDE)
|
||||
# For some reason the NMake generator doesn't work properly with the generated .obj source:
|
||||
# it requires the absolute path. So this is a hack which worked as of cmake 2.6.0 patch 0
|
||||
GET_FILENAME_COMPONENT(VSSCANF_ASM_INTDIR
|
||||
"${nvcore_BINARY_DIR}/CMakeFiles/nvcore.dir" ABSOLUTE)
|
||||
ENDIF(MSVC_IDE)
|
||||
|
||||
SET(VSSCANF_ASM_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${VSSCANF_ASM_NAME}.masm")
|
||||
SET(VSSCANF_ASM_OBJ "${VSSCANF_ASM_INTDIR}/${VSSCANF_ASM_NAME}.obj")
|
||||
|
||||
# Adds the assembly output to the sources and adds the custom command to generate it
|
||||
SET(CORE_SRCS
|
||||
${CORE_SRCS}
|
||||
${VSSCANF_ASM_OBJ}
|
||||
)
|
||||
ADD_CUSTOM_COMMAND(OUTPUT ${VSSCANF_ASM_OBJ}
|
||||
MAIN_DEPENDENCY ${VSSCANF_ASM_SRC}
|
||||
COMMAND ml64
|
||||
ARGS /nologo /Fo ${VSSCANF_ASM_OBJ} /c /Cx ${VSSCANF_ASM_SRC}
|
||||
)
|
||||
ENDIF(MSVC AND NV_SYSTEM_PROCESSOR STREQUAL "AMD64")
|
||||
|
||||
# targets
|
||||
ADD_DEFINITIONS(-DNVCORE_EXPORTS)
|
||||
|
||||
|
@ -1,59 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_CORE_ALGORITHMS_H
|
||||
#define NV_CORE_ALGORITHMS_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
// Cool constraints from "Imperfect C++"
|
||||
|
||||
// must_be_pod
|
||||
template <typename T>
|
||||
struct must_be_pod
|
||||
{
|
||||
static void constraints()
|
||||
{
|
||||
union { T T_is_not_POD_type; };
|
||||
}
|
||||
};
|
||||
|
||||
// must_be_pod_or_void
|
||||
template <typename T>
|
||||
struct must_be_pod_or_void
|
||||
{
|
||||
static void constraints()
|
||||
{
|
||||
union { T T_is_not_POD_type; };
|
||||
}
|
||||
};
|
||||
template <> struct must_be_pod_or_void<void> {};
|
||||
|
||||
// size_of
|
||||
template <typename T>
|
||||
struct size_of
|
||||
{
|
||||
enum { value = sizeof(T) };
|
||||
};
|
||||
template <>
|
||||
struct size_of<void>
|
||||
{
|
||||
enum { value = 0 };
|
||||
};
|
||||
|
||||
// must_be_same_size
|
||||
template <typename T1, typename T2>
|
||||
struct must_be_same_size
|
||||
{
|
||||
static void constraints()
|
||||
{
|
||||
const int T1_not_same_size_as_T2 = size_of<T1>::value == size_of<T2>::value;
|
||||
int i[T1_not_same_size_as_T2];
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_CORE_ALGORITHMS_H
|
@ -1,162 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include <nvcore/CpuInfo.h>
|
||||
#include <nvcore/Debug.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
#if NV_OS_WIN32
|
||||
|
||||
#define _WIN32_WINNT 0x0501
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#include <windows.h>
|
||||
|
||||
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
|
||||
|
||||
static bool isWow64()
|
||||
{
|
||||
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "IsWow64Process");
|
||||
|
||||
BOOL bIsWow64 = FALSE;
|
||||
|
||||
if (NULL != fnIsWow64Process)
|
||||
{
|
||||
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
|
||||
{
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return bIsWow64 == TRUE;
|
||||
}
|
||||
|
||||
#endif // NV_OS_WIN32
|
||||
|
||||
|
||||
#if NV_OS_LINUX
|
||||
#include <string.h>
|
||||
#include <sched.h>
|
||||
#endif // NV_OS_LINUX
|
||||
|
||||
#if NV_OS_DARWIN
|
||||
#include <sys/types.h>
|
||||
#include <sys/sysctl.h>
|
||||
#endif // NV_OS_DARWIN
|
||||
|
||||
// Initialize the data and the local defines, which are designed
|
||||
// to match the positions in cpuid
|
||||
uint CpuInfo::m_cpu = ~0x0;
|
||||
uint CpuInfo::m_procCount = 0;
|
||||
#define NV_CPUINFO_MMX_MASK (1<<23)
|
||||
#define NV_CPUINFO_SSE_MASK (1<<25)
|
||||
#define NV_CPUINFO_SSE2_MASK (1<<26)
|
||||
#define NV_CPUINFO_SSE3_MASK (1)
|
||||
|
||||
|
||||
uint CpuInfo::processorCount()
|
||||
{
|
||||
if (m_procCount == 0) {
|
||||
#if NV_OS_WIN32
|
||||
SYSTEM_INFO sysInfo;
|
||||
|
||||
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
|
||||
|
||||
if (isWow64())
|
||||
{
|
||||
GetNativeSystemInfo(&sysInfo);
|
||||
}
|
||||
else
|
||||
{
|
||||
GetSystemInfo(&sysInfo);
|
||||
}
|
||||
|
||||
uint count = (uint)sysInfo.dwNumberOfProcessors;
|
||||
m_procCount = count;
|
||||
|
||||
#elif NV_OS_LINUX
|
||||
|
||||
// Code from x264 (July 6 snapshot) cpu.c:271
|
||||
uint bit;
|
||||
uint np;
|
||||
cpu_set_t p_aff;
|
||||
memset( &p_aff, 0, sizeof(p_aff) );
|
||||
sched_getaffinity( 0, sizeof(p_aff), &p_aff );
|
||||
for( np = 0, bit = 0; bit < sizeof(p_aff); bit++ )
|
||||
np += (((uint8 *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
|
||||
m_procCount = np;
|
||||
|
||||
#elif NV_OS_DARWIN
|
||||
|
||||
// Code from x264 (July 6 snapshot) cpu.c:286
|
||||
uint numberOfCPUs;
|
||||
size_t length = sizeof( numberOfCPUs );
|
||||
if( sysctlbyname("hw.ncpu", &numberOfCPUs, &length, NULL, 0) )
|
||||
{
|
||||
numberOfCPUs = 1;
|
||||
}
|
||||
m_procCount = numberOfCPUs;
|
||||
|
||||
#else
|
||||
m_procCount = 1;
|
||||
#endif
|
||||
}
|
||||
nvDebugCheck(m_procCount > 0);
|
||||
return m_procCount;
|
||||
}
|
||||
|
||||
uint CpuInfo::coreCount()
|
||||
{
|
||||
return 1;
|
||||
}
|
||||
|
||||
bool CpuInfo::hasMMX()
|
||||
{
|
||||
return (cpu() & NV_CPUINFO_MMX_MASK) != 0;
|
||||
}
|
||||
|
||||
bool CpuInfo::hasSSE()
|
||||
{
|
||||
return (cpu() & NV_CPUINFO_SSE_MASK) != 0;
|
||||
}
|
||||
|
||||
bool CpuInfo::hasSSE2()
|
||||
{
|
||||
return (cpu() & NV_CPUINFO_SSE2_MASK) != 0;
|
||||
}
|
||||
|
||||
bool CpuInfo::hasSSE3()
|
||||
{
|
||||
return (cpu() & NV_CPUINFO_SSE3_MASK) != 0;
|
||||
}
|
||||
|
||||
inline int CpuInfo::cpu() {
|
||||
if (m_cpu == ~0x0) {
|
||||
m_cpu = 0;
|
||||
|
||||
#if NV_CC_MSVC
|
||||
int CPUInfo[4] = {-1};
|
||||
__cpuid(CPUInfo, /*InfoType*/ 1);
|
||||
|
||||
if (CPUInfo[2] & NV_CPUINFO_SSE3_MASK) {
|
||||
m_cpu |= NV_CPUINFO_SSE3_MASK;
|
||||
}
|
||||
if (CPUInfo[3] & NV_CPUINFO_MMX_MASK) {
|
||||
m_cpu |= NV_CPUINFO_MMX_MASK;
|
||||
}
|
||||
if (CPUInfo[3] & NV_CPUINFO_SSE_MASK) {
|
||||
m_cpu |= NV_CPUINFO_SSE_MASK;
|
||||
}
|
||||
if (CPUInfo[3] & NV_CPUINFO_SSE2_MASK) {
|
||||
m_cpu |= NV_CPUINFO_SSE2_MASK;
|
||||
}
|
||||
#elif NV_CC_GNUC
|
||||
// TODO: add the proper inline assembly
|
||||
#if NV_CPU_X86
|
||||
|
||||
#elif NV_CPU_X86_64
|
||||
|
||||
#endif // NV_CPU_X86_64
|
||||
#endif // NV_CC_GNUC
|
||||
}
|
||||
return m_cpu;
|
||||
}
|
@ -1,109 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_CORE_CPUINFO_H
|
||||
#define NV_CORE_CPUINFO_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
|
||||
#if NV_CC_MSVC
|
||||
#if _MSC_VER >= 1400
|
||||
# include <intrin.h> // __rdtsc
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
// CPU Information.
|
||||
class CpuInfo
|
||||
{
|
||||
protected:
|
||||
static int cpu();
|
||||
|
||||
private:
|
||||
// Cache of the CPU data
|
||||
static uint m_cpu;
|
||||
static uint m_procCount;
|
||||
|
||||
public:
|
||||
static uint processorCount();
|
||||
static uint coreCount();
|
||||
|
||||
static bool hasMMX();
|
||||
static bool hasSSE();
|
||||
static bool hasSSE2();
|
||||
static bool hasSSE3();
|
||||
};
|
||||
|
||||
#if NV_CC_MSVC
|
||||
#if _MSC_VER < 1400
|
||||
inline uint64 rdtsc()
|
||||
{
|
||||
uint64 t;
|
||||
__asm rdtsc
|
||||
__asm mov DWORD PTR [t], eax
|
||||
__asm mov DWORD PTR [t+4], edx
|
||||
return t;
|
||||
}
|
||||
#else
|
||||
#pragma intrinsic(__rdtsc)
|
||||
|
||||
inline uint64 rdtsc()
|
||||
{
|
||||
return __rdtsc();
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if NV_CC_GNUC
|
||||
|
||||
#if defined(__i386__)
|
||||
|
||||
inline /*volatile*/ uint64 rdtsc()
|
||||
{
|
||||
uint64 x;
|
||||
//__asm__ volatile ("rdtsc" : "=A" (x));
|
||||
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
|
||||
return x;
|
||||
}
|
||||
|
||||
#elif defined(__x86_64__)
|
||||
|
||||
static __inline__ uint64 rdtsc(void)
|
||||
{
|
||||
unsigned int hi, lo;
|
||||
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
|
||||
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
|
||||
}
|
||||
|
||||
#elif defined(__powerpc__)
|
||||
|
||||
static __inline__ uint64 rdtsc(void)
|
||||
{
|
||||
uint64 result=0;
|
||||
unsigned long int upper, lower, tmp;
|
||||
__asm__ volatile(
|
||||
"0: \n"
|
||||
"\tmftbu %0 \n"
|
||||
"\tmftb %1 \n"
|
||||
"\tmftbu %2 \n"
|
||||
"\tcmpw %2,%0 \n"
|
||||
"\tbne 0b \n"
|
||||
: "=r"(upper),"=r"(lower),"=r"(tmp)
|
||||
);
|
||||
result = upper;
|
||||
result = result<<32;
|
||||
result = result|lower;
|
||||
|
||||
return(result);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // NV_CC_GNUC
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_CORE_CPUINFO_H
|
@ -1,30 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_CORE_PREFETCH_H
|
||||
#define NV_CORE_PREFETCH_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
|
||||
// nvPrefetch
|
||||
#if NV_CC_GNUC
|
||||
|
||||
#define nvPrefetch(ptr) __builtin_prefetch(ptr)
|
||||
|
||||
#elif NV_CC_MSVC
|
||||
|
||||
// Uses SSE Intrinsics for both x86 and x86_64
|
||||
#include <xmmintrin.h>
|
||||
|
||||
__forceinline void nvPrefetch(const void * mem)
|
||||
{
|
||||
_mm_prefetch(static_cast<const char*>(mem), _MM_HINT_T0); /* prefetcht0 */
|
||||
// _mm_prefetch(static_cast<const char*>(mem), _MM_HINT_NTA); /* prefetchnta */
|
||||
}
|
||||
#else
|
||||
|
||||
// do nothing in other case.
|
||||
#define nvPrefetch(ptr)
|
||||
|
||||
#endif // NV_CC_MSVC
|
||||
|
||||
#endif // NV_CORE_PREFETCH_H
|
@ -1,484 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**
|
||||
* Contains source code from the article "Radix Sort Revisited".
|
||||
* \file Radix.cpp
|
||||
* \author Pierre Terdiman
|
||||
* \date April, 4, 2000
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
// References:
|
||||
// http://www.codercorner.com/RadixSortRevisited.htm
|
||||
// http://www.stereopsis.com/radix.html
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**
|
||||
* Revisited Radix Sort.
|
||||
* This is my new radix routine:
|
||||
* - it uses indices and doesn't recopy the values anymore, hence wasting less ram
|
||||
* - it creates all the histograms in one run instead of four
|
||||
* - it sorts words faster than dwords and bytes faster than words
|
||||
* - it correctly sorts negative floating-point values by patching the offsets
|
||||
* - it automatically takes advantage of temporal coherence
|
||||
* - multiple keys support is a side effect of temporal coherence
|
||||
* - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway]
|
||||
*
|
||||
* History:
|
||||
* - 08.15.98: very first version
|
||||
* - 04.04.00: recoded for the radix article
|
||||
* - 12.xx.00: code lifting
|
||||
* - 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here)
|
||||
* - 10.11.01: added local ram support
|
||||
* - 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
|
||||
* - 01.02.02: - "mIndices" renamed => "mRanks". That's a rank sorter after all.
|
||||
* - ranks are not "reset" anymore, but implicit on first calls
|
||||
* - 07.05.02: offsets rewritten with one less indirection.
|
||||
* - 11.03.02: "bool" replaced with RadixHint enum
|
||||
* - 07.15.04: stack-based radix added
|
||||
* - we want to use the radix sort but without making it static, and without allocating anything.
|
||||
* - we internally allocate two arrays of ranks. Each of them has N uint32s to sort N values.
|
||||
* - 1Mb/2/sizeof(uint32) = 131072 values max, at the same time.
|
||||
* - 09.22.04: - adapted to MacOS by Chris Lamb
|
||||
* - 01.12.06: - added optimizations suggested by Kyle Hubert
|
||||
* - 04.06.08: - Fix bug negative zero sorting bug by Ignacio Castaño
|
||||
*
|
||||
* \class RadixSort
|
||||
* \author Pierre Terdiman
|
||||
* \version 1.5
|
||||
* \date August, 15, 1998
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Header
|
||||
|
||||
#include <nvcore/Radix.h>
|
||||
|
||||
#include <string.h> // memset
|
||||
|
||||
//using namespace IceCore;
|
||||
|
||||
#define INVALIDATE_RANKS mCurrentSize|=0x80000000
|
||||
#define VALIDATE_RANKS mCurrentSize&=0x7fffffff
|
||||
#define CURRENT_SIZE (mCurrentSize&0x7fffffff)
|
||||
#define INVALID_RANKS (mCurrentSize&0x80000000)
|
||||
|
||||
#if NV_BIG_ENDIAN
|
||||
#define H0_OFFSET 768
|
||||
#define H1_OFFSET 512
|
||||
#define H2_OFFSET 256
|
||||
#define H3_OFFSET 0
|
||||
#define BYTES_INC (3-j)
|
||||
#else
|
||||
#define H0_OFFSET 0
|
||||
#define H1_OFFSET 256
|
||||
#define H2_OFFSET 512
|
||||
#define H3_OFFSET 768
|
||||
#define BYTES_INC j
|
||||
#endif
|
||||
|
||||
#define CREATE_HISTOGRAMS(type, buffer) \
|
||||
/* Clear counters/histograms */ \
|
||||
memset(mHistogram, 0, 256*4*sizeof(uint32)); \
|
||||
\
|
||||
/* Prepare to count */ \
|
||||
const uint8* p = (const uint8*)input; \
|
||||
const uint8* pe = &p[nb*4]; \
|
||||
uint32* h0= &mHistogram[H0_OFFSET]; /* Histogram for first pass (LSB) */ \
|
||||
uint32* h1= &mHistogram[H1_OFFSET]; /* Histogram for second pass */ \
|
||||
uint32* h2= &mHistogram[H2_OFFSET]; /* Histogram for third pass */ \
|
||||
uint32* h3= &mHistogram[H3_OFFSET]; /* Histogram for last pass (MSB) */ \
|
||||
\
|
||||
bool AlreadySorted = true; /* Optimism... */ \
|
||||
\
|
||||
if(INVALID_RANKS) \
|
||||
{ \
|
||||
/* Prepare for temporal coherence */ \
|
||||
type* Running = (type*)buffer; \
|
||||
type PrevVal = *Running; \
|
||||
\
|
||||
while(p!=pe) \
|
||||
{ \
|
||||
/* Read input buffer in previous sorted order */ \
|
||||
type Val = *Running++; \
|
||||
/* Check whether already sorted or not */ \
|
||||
if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \
|
||||
/* Update for next iteration */ \
|
||||
PrevVal = Val; \
|
||||
\
|
||||
/* Create histograms */ \
|
||||
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
|
||||
} \
|
||||
\
|
||||
/* If all input values are already sorted, we just have to return and leave the */ \
|
||||
/* previous list unchanged. That way the routine may take advantage of temporal */ \
|
||||
/* coherence, for example when used to sort transparent faces. */ \
|
||||
if(AlreadySorted) \
|
||||
{ \
|
||||
mNbHits++; \
|
||||
for(uint32 i=0;i<nb;i++) mRanks[i] = i; \
|
||||
return *this; \
|
||||
} \
|
||||
} \
|
||||
else \
|
||||
{ \
|
||||
/* Prepare for temporal coherence */ \
|
||||
const uint32* Indices = mRanks; \
|
||||
type PrevVal = (type)buffer[*Indices]; \
|
||||
\
|
||||
while(p!=pe) \
|
||||
{ \
|
||||
/* Read input buffer in previous sorted order */ \
|
||||
type Val = (type)buffer[*Indices++]; \
|
||||
/* Check whether already sorted or not */ \
|
||||
if(Val<PrevVal) { AlreadySorted = false; break; } /* Early out */ \
|
||||
/* Update for next iteration */ \
|
||||
PrevVal = Val; \
|
||||
\
|
||||
/* Create histograms */ \
|
||||
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
|
||||
} \
|
||||
\
|
||||
/* If all input values are already sorted, we just have to return and leave the */ \
|
||||
/* previous list unchanged. That way the routine may take advantage of temporal */ \
|
||||
/* coherence, for example when used to sort transparent faces. */ \
|
||||
if(AlreadySorted) { mNbHits++; return *this; } \
|
||||
} \
|
||||
\
|
||||
/* Else there has been an early out and we must finish computing the histograms */ \
|
||||
while(p!=pe) \
|
||||
{ \
|
||||
/* Create histograms without the previous overhead */ \
|
||||
h0[*p++]++; h1[*p++]++; h2[*p++]++; h3[*p++]++; \
|
||||
}
|
||||
|
||||
#define CHECK_PASS_VALIDITY(pass) \
|
||||
/* Shortcut to current counters */ \
|
||||
const uint32* CurCount = &mHistogram[pass<<8]; \
|
||||
\
|
||||
/* Reset flag. The sorting pass is supposed to be performed. (default) */ \
|
||||
bool PerformPass = true; \
|
||||
\
|
||||
/* Check pass validity */ \
|
||||
\
|
||||
/* If all values have the same byte, sorting is useless. */ \
|
||||
/* It may happen when sorting bytes or words instead of dwords. */ \
|
||||
/* This routine actually sorts words faster than dwords, and bytes */ \
|
||||
/* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */ \
|
||||
/* for words and O(n) for bytes. Running time for floats depends on actual values... */ \
|
||||
\
|
||||
/* Get first byte */ \
|
||||
uint8 UniqueVal = *(((uint8*)input)+pass); \
|
||||
\
|
||||
/* Check that byte's counter */ \
|
||||
if(CurCount[UniqueVal]==nb) PerformPass=false;
|
||||
|
||||
using namespace nv;
|
||||
|
||||
/// Constructor.
|
||||
RadixSort::RadixSort() : mRanks(NULL), mRanks2(NULL), mCurrentSize(0), mTotalCalls(0), mNbHits(0), mDeleteRanks(true)
|
||||
{
|
||||
// Initialize indices
|
||||
INVALIDATE_RANKS;
|
||||
}
|
||||
|
||||
/// Destructor.
|
||||
RadixSort::~RadixSort()
|
||||
{
|
||||
// Release everything
|
||||
if(mDeleteRanks)
|
||||
{
|
||||
delete [] mRanks2;
|
||||
delete [] mRanks;
|
||||
}
|
||||
}
|
||||
|
||||
/// Resizes the inner lists.
|
||||
/// \param nb [in] new size (number of dwords)
|
||||
/// \return true if success
|
||||
bool RadixSort::resize(uint32 nb)
|
||||
{
|
||||
if(mDeleteRanks)
|
||||
{
|
||||
// Free previously used ram
|
||||
delete [] mRanks2;
|
||||
delete [] mRanks;
|
||||
|
||||
// Get some fresh one
|
||||
mRanks = new uint32[nb];
|
||||
mRanks2 = new uint32[nb];
|
||||
}
|
||||
return true;
|
||||
|
||||
}
|
||||
|
||||
inline void RadixSort::checkResize(uint32 nb)
|
||||
{
|
||||
uint32 CurSize = CURRENT_SIZE;
|
||||
if(nb!=CurSize)
|
||||
{
|
||||
if(nb>CurSize) resize(nb);
|
||||
mCurrentSize = nb;
|
||||
INVALIDATE_RANKS;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**
|
||||
* Main sort routine.
|
||||
* This one is for integer values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data.
|
||||
* \param input [in] a list of integer values to sort
|
||||
* \param nb [in] number of values to sort
|
||||
* \param signedvalues [in] true to handle negative values, false if you know your input buffer only contains positive values
|
||||
* \return Self-Reference
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
RadixSort& RadixSort::sort(const uint32* input, uint32 nb, bool signedValues/*=true*/)
|
||||
{
|
||||
// Checkings
|
||||
if(!input || !nb || nb&0x80000000) return *this;
|
||||
|
||||
// Stats
|
||||
mTotalCalls++;
|
||||
|
||||
// Resize lists if needed
|
||||
checkResize(nb);
|
||||
|
||||
// Allocate histograms & offsets on the stack
|
||||
uint32 mHistogram[256*4];
|
||||
uint32* mLink[256];
|
||||
|
||||
// Create histograms (counters). Counters for all passes are created in one run.
|
||||
// Pros: read input buffer once instead of four times
|
||||
// Cons: mHistogram is 4Kb instead of 1Kb
|
||||
// We must take care of signed/unsigned values for temporal coherence.... I just
|
||||
// have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
|
||||
if(!signedValues) { CREATE_HISTOGRAMS(uint32, input); }
|
||||
else { CREATE_HISTOGRAMS(int32, input); }
|
||||
|
||||
// Radix sort, j is the pass number (0=LSB, 3=MSB)
|
||||
for(uint32 j=0;j<4;j++)
|
||||
{
|
||||
CHECK_PASS_VALIDITY(j);
|
||||
|
||||
// Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is
|
||||
// not a problem, numbers are correctly sorted anyway.
|
||||
if(PerformPass)
|
||||
{
|
||||
// Should we care about negative values?
|
||||
if(j!=3 || !signedValues)
|
||||
{
|
||||
// Here we deal with positive values only
|
||||
|
||||
// Create offsets
|
||||
mLink[0] = mRanks2;
|
||||
for(uint32 i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
}
|
||||
else
|
||||
{
|
||||
// This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.
|
||||
mLink[128] = mRanks2;
|
||||
for(uint32 i=129;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
|
||||
mLink[0] = mLink[255] + CurCount[255];
|
||||
for(uint32 i=1;i<128;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
}
|
||||
|
||||
// Perform Radix Sort
|
||||
const uint8* InputBytes = (const uint8*)input;
|
||||
InputBytes += BYTES_INC;
|
||||
if(INVALID_RANKS)
|
||||
{
|
||||
for(uint32 i=0;i<nb;i++) *mLink[InputBytes[i<<2]]++ = i;
|
||||
VALIDATE_RANKS;
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32* Indices = mRanks;
|
||||
const uint32* IndicesEnd = &mRanks[nb];
|
||||
while(Indices!=IndicesEnd)
|
||||
{
|
||||
uint32 id = *Indices++;
|
||||
*mLink[InputBytes[id<<2]]++ = id;
|
||||
}
|
||||
}
|
||||
|
||||
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
|
||||
uint32* Tmp = mRanks;
|
||||
mRanks = mRanks2;
|
||||
mRanks2 = Tmp;
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**
|
||||
* Main sort routine.
|
||||
* This one is for floating-point values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data.
|
||||
* \param input [in] a list of floating-point values to sort
|
||||
* \param nb [in] number of values to sort
|
||||
* \return Self-Reference
|
||||
* \warning only sorts IEEE floating-point values
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
RadixSort& RadixSort::sort(const float* input2, uint32 nb)
|
||||
{
|
||||
// Checkings
|
||||
if(!input2 || !nb || nb&0x80000000) return *this;
|
||||
|
||||
// Stats
|
||||
mTotalCalls++;
|
||||
|
||||
const uint32* input = (const uint32*)input2;
|
||||
|
||||
// Resize lists if needed
|
||||
checkResize(nb);
|
||||
|
||||
// Allocate histograms & offsets on the stack
|
||||
uint32 mHistogram[256*4];
|
||||
uint32* mLink[256];
|
||||
|
||||
// Create histograms (counters). Counters for all passes are created in one run.
|
||||
// Pros: read input buffer once instead of four times
|
||||
// Cons: mHistogram is 4Kb instead of 1Kb
|
||||
// Floating-point values are always supposed to be signed values, so there's only one code path there.
|
||||
// Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
|
||||
// is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
|
||||
// generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
|
||||
// wouldn't work with mixed positive/negative values....
|
||||
{ CREATE_HISTOGRAMS(float, input2); }
|
||||
|
||||
// Radix sort, j is the pass number (0=LSB, 3=MSB)
|
||||
for(uint32 j=0;j<4;j++)
|
||||
{
|
||||
// Should we care about negative values?
|
||||
if(j!=3)
|
||||
{
|
||||
// Here we deal with positive values only
|
||||
CHECK_PASS_VALIDITY(j);
|
||||
|
||||
if(PerformPass)
|
||||
{
|
||||
// Create offsets
|
||||
mLink[0] = mRanks2;
|
||||
for(uint32 i=1;i<256;i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
|
||||
// Perform Radix Sort
|
||||
const uint8* InputBytes = (const uint8*)input;
|
||||
InputBytes += BYTES_INC;
|
||||
if(INVALID_RANKS)
|
||||
{
|
||||
for(uint32 i=0;i<nb;i++) *mLink[InputBytes[i<<2]]++ = i;
|
||||
VALIDATE_RANKS;
|
||||
}
|
||||
else
|
||||
{
|
||||
const uint32* Indices = mRanks;
|
||||
const uint32* IndicesEnd = &mRanks[nb];
|
||||
while(Indices!=IndicesEnd)
|
||||
{
|
||||
uint32 id = *Indices++;
|
||||
*mLink[InputBytes[id<<2]]++ = id;
|
||||
}
|
||||
}
|
||||
|
||||
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
|
||||
uint32* Tmp = mRanks;
|
||||
mRanks = mRanks2;
|
||||
mRanks2 = Tmp;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// This is a special case to correctly handle negative values
|
||||
CHECK_PASS_VALIDITY(j);
|
||||
|
||||
if(PerformPass)
|
||||
{
|
||||
mLink[255] = mRanks2 + CurCount[255];
|
||||
for(uint32 i = 254; i > 126; i--) mLink[i] = mLink[i+1] + CurCount[i];
|
||||
mLink[0] = mLink[127] + CurCount[127];
|
||||
for(uint32 i = 1; i < 127; i++) mLink[i] = mLink[i-1] + CurCount[i-1];
|
||||
|
||||
// Perform Radix Sort
|
||||
if(INVALID_RANKS)
|
||||
{
|
||||
for(uint32 i=0;i<nb;i++)
|
||||
{
|
||||
uint32 Radix = input[i]>>24; // Radix byte, same as above. AND is useless here (uint32).
|
||||
// ### cmp to be killed. Not good. Later.
|
||||
if(Radix<128) *mLink[Radix]++ = i; // Number is positive, same as above
|
||||
else *(--mLink[Radix]) = i; // Number is negative, flip the sorting order
|
||||
}
|
||||
VALIDATE_RANKS;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(uint32 i=0;i<nb;i++)
|
||||
{
|
||||
uint32 Radix = input[mRanks[i]]>>24; // Radix byte, same as above. AND is useless here (uint32).
|
||||
// ### cmp to be killed. Not good. Later.
|
||||
if(Radix<128) *mLink[Radix]++ = mRanks[i]; // Number is positive, same as above
|
||||
else *(--mLink[Radix]) = mRanks[i]; // Number is negative, flip the sorting order
|
||||
}
|
||||
}
|
||||
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
|
||||
uint32* Tmp = mRanks;
|
||||
mRanks = mRanks2;
|
||||
mRanks2 = Tmp;
|
||||
}
|
||||
else
|
||||
{
|
||||
// The pass is useless, yet we still have to reverse the order of current list if all values are negative.
|
||||
if(UniqueVal>=128)
|
||||
{
|
||||
if(INVALID_RANKS)
|
||||
{
|
||||
// ###Possible?
|
||||
for(uint32 i=0;i<nb;i++) mRanks2[i] = nb-i-1;
|
||||
VALIDATE_RANKS;
|
||||
}
|
||||
else
|
||||
{
|
||||
for(uint32 i=0;i<nb;i++) mRanks2[i] = mRanks[nb-i-1];
|
||||
}
|
||||
|
||||
// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
|
||||
uint32* Tmp = mRanks;
|
||||
mRanks = mRanks2;
|
||||
mRanks2 = Tmp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
bool RadixSort::setRankBuffers(uint32* ranks0, uint32* ranks1)
|
||||
{
|
||||
if(!ranks0 || !ranks1) return false;
|
||||
|
||||
mRanks = ranks0;
|
||||
mRanks2 = ranks1;
|
||||
mDeleteRanks = false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
RadixSort & RadixSort::sort(const Array<int> & input)
|
||||
{
|
||||
return sort((const uint32 *)input.buffer(), input.count(), true);
|
||||
}
|
||||
|
||||
RadixSort & RadixSort::sort(const Array<uint> & input)
|
||||
{
|
||||
return sort(input.buffer(), input.count(), false);
|
||||
}
|
||||
|
||||
RadixSort & RadixSort::sort(const Array<float> & input)
|
||||
{
|
||||
return sort(input.buffer(), input.count());
|
||||
}
|
@ -1,73 +0,0 @@
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
/**
|
||||
* Contains source code from the article "Radix Sort Revisited".
|
||||
* \file Radix.h
|
||||
* \author Pierre Terdiman
|
||||
* \date April, 4, 2000
|
||||
*/
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
|
||||
// Include Guard
|
||||
#ifndef NV_CORE_RADIXSORT_H
|
||||
#define NV_CORE_RADIXSORT_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
#include <nvcore/Containers.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
class NVCORE_CLASS RadixSort
|
||||
{
|
||||
NV_FORBID_COPY(RadixSort);
|
||||
public:
|
||||
// Constructor/Destructor
|
||||
RadixSort();
|
||||
~RadixSort();
|
||||
|
||||
// Sorting methods
|
||||
RadixSort & sort(const uint32* input, uint32 nb, bool signedValues=true);
|
||||
RadixSort & sort(const float* input, uint32 nb);
|
||||
|
||||
// Helpers
|
||||
RadixSort & sort(const Array<int> & input);
|
||||
RadixSort & sort(const Array<uint> & input);
|
||||
RadixSort & sort(const Array<float> & input);
|
||||
|
||||
|
||||
//! Access to results. mRanks is a list of indices in sorted order, i.e. in the order you may further process your data
|
||||
inline /*const*/ uint32 * ranks() /*const*/ { return mRanks; }
|
||||
|
||||
//! mIndices2 gets trashed on calling the sort routine, but otherwise you can recycle it the way you want.
|
||||
inline uint32 * recyclable() const { return mRanks2; }
|
||||
|
||||
// Stats
|
||||
//! Returns the total number of calls to the radix sorter.
|
||||
inline uint32 totalCalls() const { return mTotalCalls; }
|
||||
|
||||
//! Returns the number of early exits due to temporal coherence.
|
||||
inline uint32 hits() const { return mNbHits; }
|
||||
|
||||
bool setRankBuffers(uint32* ranks0, uint32* ranks1);
|
||||
|
||||
private:
|
||||
uint32 mCurrentSize; //!< Current size of the indices list
|
||||
uint32 * mRanks; //!< Two lists, swapped each pass
|
||||
uint32 * mRanks2;
|
||||
|
||||
// Stats
|
||||
uint32 mTotalCalls; //!< Total number of calls to the sort routine
|
||||
uint32 mNbHits; //!< Number of early exits due to coherence
|
||||
|
||||
// Stack-radix
|
||||
bool mDeleteRanks; //!<
|
||||
|
||||
// Internal methods
|
||||
void checkResize(uint32 nb);
|
||||
bool resize(uint32 nb);
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_CORE_RADIXSORT_H
|
@ -1,9 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#include "RefCounted.h"
|
||||
|
||||
using namespace nv;
|
||||
|
||||
int nv::RefCounted::s_total_ref_count = 0;
|
||||
int nv::RefCounted::s_total_obj_count = 0;
|
||||
|
@ -1,119 +0,0 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#ifndef NV_CORE_REFCOUNTED_H
|
||||
#define NV_CORE_REFCOUNTED_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
#include <nvcore/Debug.h>
|
||||
|
||||
#define NV_DECLARE_PTR(Class) \
|
||||
template <class T> class SmartPtr; \
|
||||
typedef SmartPtr<class Class> Class ## Ptr; \
|
||||
typedef SmartPtr<const class Class> Class ## ConstPtr
|
||||
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
/// Reference counted base class to be used with SmartPtr and WeakPtr.
|
||||
class RefCounted
|
||||
{
|
||||
NV_FORBID_COPY(RefCounted);
|
||||
public:
|
||||
|
||||
/// Ctor.
|
||||
RefCounted() : m_count(0)/*, m_weak_proxy(NULL)*/
|
||||
{
|
||||
s_total_obj_count++;
|
||||
}
|
||||
|
||||
/// Virtual dtor.
|
||||
virtual ~RefCounted()
|
||||
{
|
||||
nvCheck( m_count == 0 );
|
||||
nvCheck( s_total_obj_count > 0 );
|
||||
s_total_obj_count--;
|
||||
}
|
||||
|
||||
|
||||
/// Increase reference count.
|
||||
uint addRef() const
|
||||
{
|
||||
s_total_ref_count++;
|
||||
m_count++;
|
||||
return m_count;
|
||||
}
|
||||
|
||||
|
||||
/// Decrease reference count and remove when 0.
|
||||
uint release() const
|
||||
{
|
||||
nvCheck( m_count > 0 );
|
||||
|
||||
s_total_ref_count--;
|
||||
m_count--;
|
||||
if( m_count == 0 ) {
|
||||
// releaseWeakProxy();
|
||||
delete this;
|
||||
return 0;
|
||||
}
|
||||
return m_count;
|
||||
}
|
||||
/*
|
||||
/// Get weak proxy.
|
||||
WeakProxy * getWeakProxy() const
|
||||
{
|
||||
if (m_weak_proxy == NULL) {
|
||||
m_weak_proxy = new WeakProxy;
|
||||
m_weak_proxy->AddRef();
|
||||
}
|
||||
return m_weak_proxy;
|
||||
}
|
||||
|
||||
/// Release the weak proxy.
|
||||
void releaseWeakProxy() const
|
||||
{
|
||||
if (m_weak_proxy != NULL) {
|
||||
m_weak_proxy->NotifyObjectDied();
|
||||
m_weak_proxy->Release();
|
||||
m_weak_proxy = NULL;
|
||||
}
|
||||
}
|
||||
*/
|
||||
/** @name Debug methods: */
|
||||
//@{
|
||||
/// Get reference count.
|
||||
int refCount() const
|
||||
{
|
||||
return m_count;
|
||||
}
|
||||
|
||||
/// Get total number of objects.
|
||||
static int totalObjectCount()
|
||||
{
|
||||
return s_total_obj_count;
|
||||
}
|
||||
|
||||
/// Get total number of references.
|
||||
static int totalReferenceCount()
|
||||
{
|
||||
return s_total_ref_count;
|
||||
}
|
||||
//@}
|
||||
|
||||
|
||||
private:
|
||||
|
||||
NVCORE_API static int s_total_ref_count;
|
||||
NVCORE_API static int s_total_obj_count;
|
||||
|
||||
mutable int m_count;
|
||||
// mutable WeakProxy * weak_proxy;
|
||||
|
||||
};
|
||||
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_CORE_REFCOUNTED_H
|
@ -1,259 +0,0 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#include "Tokenizer.h"
|
||||
#include <nvcore/StrLib.h>
|
||||
|
||||
#include <stdio.h> // vsscanf
|
||||
#include <stdarg.h> // va_list
|
||||
#include <stdlib.h> // atof, atoi
|
||||
|
||||
#if NV_CC_MSVC
|
||||
#if defined NV_CPU_X86
|
||||
/* vsscanf for Win32
|
||||
* Written 5/2003 by <mgix@mgix.com>
|
||||
* This code is in the Public Domain
|
||||
*/
|
||||
|
||||
#include <malloc.h> // alloca
|
||||
//#include <string.h>
|
||||
|
||||
static int vsscanf(const char * buffer, const char * format, va_list argPtr)
|
||||
{
|
||||
// Get an upper bound for the # of args
|
||||
size_t count = 0;
|
||||
const char *p = format;
|
||||
while(1) {
|
||||
char c = *(p++);
|
||||
if(c==0) break;
|
||||
if(c=='%' && (p[0]!='*' && p[0]!='%')) ++count;
|
||||
}
|
||||
|
||||
// Make a local stack
|
||||
size_t stackSize = (2+count)*sizeof(void*);
|
||||
void **newStack = (void**)alloca(stackSize);
|
||||
|
||||
// Fill local stack the way sscanf likes it
|
||||
newStack[0] = (void*)buffer;
|
||||
newStack[1] = (void*)format;
|
||||
memcpy(newStack+2, argPtr, count*sizeof(void*));
|
||||
|
||||
// @@ Use: CALL DWORD PTR [sscanf]
|
||||
|
||||
// Warp into system sscanf with new stack
|
||||
int result;
|
||||
void *savedESP;
|
||||
__asm
|
||||
{
|
||||
mov savedESP, esp
|
||||
mov esp, newStack
|
||||
#if _MSC_VER >= 1400
|
||||
call DWORD PTR [sscanf_s]
|
||||
#else
|
||||
call DWORD PTR [sscanf]
|
||||
#endif
|
||||
mov esp, savedESP
|
||||
mov result, eax
|
||||
}
|
||||
return result;
|
||||
}
|
||||
#elif defined NV_CPU_X86_64
|
||||
|
||||
/* Prototype of the helper assembly function */
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
int vsscanf_proxy_win64(const char * buffer, const char * format, va_list argPtr, __int64 count);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
/* MASM64 version of the above vsscanf */
|
||||
static int vsscanf(const char * buffer, const char * format, va_list argPtr)
|
||||
{
|
||||
// Get an upper bound for the # of args
|
||||
__int64 count = 0;
|
||||
const char *p = format;
|
||||
while(1) {
|
||||
char c = *(p++);
|
||||
if(c==0) break;
|
||||
if(c=='%' && (p[0]!='*' && p[0]!='%')) ++count;
|
||||
}
|
||||
return vsscanf_proxy_win64(buffer, format, argPtr, count);
|
||||
}
|
||||
|
||||
/*#error vsscanf doesn't work on MSVC for x64*/
|
||||
#else
|
||||
#error Unknown processor for MSVC
|
||||
#endif
|
||||
#endif // NV_CC_MSVC
|
||||
|
||||
using namespace nv;
|
||||
|
||||
Token::Token() :
|
||||
m_str(""), m_len(0)
|
||||
{
|
||||
}
|
||||
|
||||
Token::Token(const Token & token) :
|
||||
m_str(token.m_str), m_len(token.m_len)
|
||||
{
|
||||
}
|
||||
|
||||
Token::Token(const char * str, int len) :
|
||||
m_str(str), m_len(len)
|
||||
{
|
||||
}
|
||||
|
||||
bool Token::operator==(const char * str) const
|
||||
{
|
||||
return strncmp(m_str, str, m_len) == 0;
|
||||
}
|
||||
bool Token::operator!=(const char * str) const
|
||||
{
|
||||
return strncmp(m_str, str, m_len) != 0;
|
||||
}
|
||||
|
||||
bool Token::isNull()
|
||||
{
|
||||
return m_len != 0;
|
||||
}
|
||||
|
||||
float Token::toFloat() const
|
||||
{
|
||||
return float(atof(m_str));
|
||||
}
|
||||
|
||||
int Token::toInt() const
|
||||
{
|
||||
return atoi(m_str);
|
||||
}
|
||||
|
||||
uint Token::toUnsignedInt() const
|
||||
{
|
||||
// @@ TBD
|
||||
return uint(atoi(m_str));
|
||||
}
|
||||
|
||||
String Token::toString() const
|
||||
{
|
||||
return String(m_str, m_len);
|
||||
}
|
||||
|
||||
bool Token::parse(const char * format, int count, ...) const
|
||||
{
|
||||
va_list arg;
|
||||
va_start(arg, count);
|
||||
|
||||
int readCount = vsscanf(m_str, format, arg);
|
||||
|
||||
va_end(arg);
|
||||
|
||||
return readCount == count;
|
||||
}
|
||||
|
||||
|
||||
Tokenizer::Tokenizer(Stream * stream) :
|
||||
m_reader(stream), m_lineNumber(0), m_columnNumber(0), m_delimiters("{}()="), m_spaces(" \t")
|
||||
{
|
||||
}
|
||||
|
||||
bool Tokenizer::nextLine(bool skipEmptyLines /*= true*/)
|
||||
{
|
||||
do {
|
||||
if (!readLine()) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
while (!readToken() && skipEmptyLines);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Tokenizer::nextToken(bool skipEndOfLine /*= false*/)
|
||||
{
|
||||
if (!readToken()) {
|
||||
if (!skipEndOfLine) {
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
return nextLine(true);
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
bool Tokenizer::readToken()
|
||||
{
|
||||
skipSpaces();
|
||||
|
||||
const char * begin = m_line + m_columnNumber;
|
||||
|
||||
if (*begin == '\0') {
|
||||
return false;
|
||||
}
|
||||
|
||||
char c = readChar();
|
||||
if (isDelimiter(c)) {
|
||||
m_token = Token(begin, 1);
|
||||
return true;
|
||||
}
|
||||
|
||||
// @@ Add support for quoted tokens "", ''
|
||||
|
||||
int len = 0;
|
||||
while (!isDelimiter(c) && !isSpace(c) && c != '\0') {
|
||||
c = readChar();
|
||||
len++;
|
||||
}
|
||||
m_columnNumber--;
|
||||
|
||||
m_token = Token(begin, len);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
char Tokenizer::readChar()
|
||||
{
|
||||
return m_line[m_columnNumber++];
|
||||
}
|
||||
|
||||
bool Tokenizer::readLine()
|
||||
{
|
||||
m_lineNumber++;
|
||||
m_columnNumber = 0;
|
||||
m_line = m_reader.readLine();
|
||||
return m_line != NULL;
|
||||
}
|
||||
|
||||
void Tokenizer::skipSpaces()
|
||||
{
|
||||
while (isSpace(readChar())) {}
|
||||
m_columnNumber--;
|
||||
}
|
||||
|
||||
bool Tokenizer::isSpace(char c)
|
||||
{
|
||||
uint i = 0;
|
||||
while (m_spaces[i] != '\0') {
|
||||
if (c == m_spaces[i]) {
|
||||
return true;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool Tokenizer::isDelimiter(char c)
|
||||
{
|
||||
uint i = 0;
|
||||
while (m_delimiters[i] != '\0') {
|
||||
if (c == m_delimiters[i]) {
|
||||
return true;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
@ -1,98 +0,0 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#ifndef NV_CORE_TOKENIZER_H
|
||||
#define NV_CORE_TOKENIZER_H
|
||||
|
||||
#include <nvcore/StrLib.h>
|
||||
#include <nvcore/Stream.h>
|
||||
#include <nvcore/TextReader.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
/// A token produced by the Tokenizer.
|
||||
class NVCORE_CLASS Token
|
||||
{
|
||||
public:
|
||||
Token();
|
||||
Token(const Token & token);
|
||||
Token(const char * str, int len);
|
||||
|
||||
bool operator==(const char * str) const;
|
||||
bool operator!=(const char * str) const;
|
||||
|
||||
bool isNull();
|
||||
|
||||
float toFloat() const;
|
||||
int toInt() const;
|
||||
uint toUnsignedInt() const;
|
||||
String toString() const;
|
||||
|
||||
bool parse(const char * format, int count, ...) const __attribute__((format (scanf, 2, 4)));
|
||||
|
||||
private:
|
||||
const char * m_str;
|
||||
int m_len;
|
||||
};
|
||||
|
||||
/// Exception thrown by the tokenizer.
|
||||
class TokenizerException
|
||||
{
|
||||
public:
|
||||
TokenizerException(int line, int column) : m_line(line), m_column(column) {}
|
||||
|
||||
int line() const { return m_line; }
|
||||
int column() const { return m_column; }
|
||||
|
||||
private:
|
||||
int m_line;
|
||||
int m_column;
|
||||
};
|
||||
|
||||
// @@ Use enums instead of bools for clarity!
|
||||
//enum SkipEmptyLines { skipEmptyLines, noSkipEmptyLines };
|
||||
//enum SkipEndOfLine { skipEndOfLine, noSkipEndOfLine };
|
||||
|
||||
/// A simple stream tokenizer.
|
||||
class NVCORE_CLASS Tokenizer
|
||||
{
|
||||
public:
|
||||
Tokenizer(Stream * stream);
|
||||
|
||||
bool nextLine(bool skipEmptyLines = true);
|
||||
bool nextToken(bool skipEndOfLine = false);
|
||||
|
||||
const Token & token() const { return m_token; }
|
||||
|
||||
int lineNumber() const { return m_lineNumber; }
|
||||
int columnNumber() const { return m_columnNumber; }
|
||||
|
||||
void setDelimiters(const char * str) { m_delimiters = str; }
|
||||
const char * delimiters() const { return m_delimiters; }
|
||||
|
||||
void setSpaces(const char * str) { m_spaces = str; }
|
||||
const char * spaces() const { return m_spaces; }
|
||||
|
||||
private:
|
||||
char readChar();
|
||||
bool readLine();
|
||||
bool readToken();
|
||||
void skipSpaces();
|
||||
bool isSpace(char c);
|
||||
bool isDelimiter(char c);
|
||||
|
||||
private:
|
||||
TextReader m_reader;
|
||||
const char * m_line;
|
||||
Token m_token;
|
||||
|
||||
int m_lineNumber;
|
||||
int m_columnNumber;
|
||||
|
||||
const char * m_delimiters;
|
||||
const char * m_spaces;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_CORE_TOKENIZER_H
|
@ -1,124 +0,0 @@
|
||||
; MASM x64 version of
|
||||
; vsscanf for Win32
|
||||
; originally written 5/2003 by <mgix@mgix.com>
|
||||
;
|
||||
; This was done because MSVC does not accept inline assembly code
|
||||
; for the x64 platform, so this file implements almost the whole
|
||||
; module in assembly using the amd64 ABI
|
||||
;
|
||||
; 06/17/2008 by edgarv [at] nvidia com
|
||||
|
||||
; Definition of memcpy
|
||||
memcpy PROTO dest:Ptr, src:Ptr, numbytes:QWORD
|
||||
|
||||
; Definition of sscanf
|
||||
sscanf PROTO buffer:Ptr Byte, format:Ptr Byte, args:VARARG
|
||||
|
||||
|
||||
|
||||
; Start a code segment named "_TEXT" by default
|
||||
.CODE
|
||||
|
||||
; Entry point of our function: at this point we can use
|
||||
; named parameters
|
||||
ALIGN 16
|
||||
PUBLIC vsscanf_proxy_win64
|
||||
|
||||
; Because the x64 code uses the fast call convention, only
|
||||
; the arguments beyond the 4th one are available from the stack.
|
||||
; The first four parameters are in RCX, RDX, R8 and R9
|
||||
|
||||
; Parameters:
|
||||
; const char* buffer
|
||||
; const char* format
|
||||
; va_list argPtr
|
||||
; size_t count
|
||||
vsscanf_proxy_win64 PROC, \
|
||||
buffer:PTR Byte, format:PTR Byte, argPtr:PTR, count:QWORD
|
||||
|
||||
; Allocates space for our local variable, savedRDP
|
||||
sub rsp, 08h
|
||||
|
||||
; Copies the parameters from the registers to the memory: before warping to
|
||||
; sscanf we will call memcpy, and those registers can just dissapear!
|
||||
mov buffer, rcx
|
||||
mov format, rdx
|
||||
mov argPtr, r8
|
||||
mov count, r9
|
||||
|
||||
|
||||
; Allocate extra space in the stack for (2+count)*sizeof(void*),
|
||||
; this is (2+count)*(8)
|
||||
mov r10, r9
|
||||
add r10, 2 ; count += 2
|
||||
sal r10, 3 ; count *= 8
|
||||
add r10, 0fh ; To force alignment to 16bytes
|
||||
and r10, 0fffffffffffffff0h
|
||||
sub rsp, r10 ; Actual stack allocation
|
||||
|
||||
|
||||
; Continues by copying all the arguments in the "alloca" space
|
||||
mov [rsp], rcx ; newStack[0] = (void*)buffer;
|
||||
mov [rsp + 08h], rdx ; newStack[1] = (void*)format;
|
||||
|
||||
; Calls memcpy(newStack+2, argPtr, count*sizeof(void*));
|
||||
mov rcx, rsp
|
||||
add rcx, 010h ; newStack+2
|
||||
mov rdx, r8 ; argPtr
|
||||
mov r8, r9
|
||||
sal r8, 3 ; count*sizeof(void*)
|
||||
|
||||
; Prepares extra stack space as required by the ABI for 4 arguments, and calls memcpy
|
||||
sub rsp, 020h
|
||||
call memcpy
|
||||
|
||||
; Restore the stack
|
||||
add rsp, 020h
|
||||
|
||||
; Saves rsp in memory
|
||||
mov qword ptr [rbp - 8], rsp
|
||||
|
||||
; Does exactly the same trick as before: warp into system sscanf with the new stack,
|
||||
; but this time we also setup the arguments in the registers according to the amd64 ABI
|
||||
|
||||
; If there was at least one argument (after buffer and format), we need to copy that
|
||||
; to r8, and if there was a second one we must copy that to r9
|
||||
; (the first arguments to sscanf are always the buffer and the format)
|
||||
mov r10, count
|
||||
|
||||
; Copy the first argument to r8 (if it exists)
|
||||
cmp r10, 0
|
||||
je args_memcpy
|
||||
mov r8, [rsp + 10h]
|
||||
|
||||
; Copy the second argument to r9 (if it exists)
|
||||
cmp r10, 1
|
||||
je args_memcpy
|
||||
mov r9, [rsp + 18h]
|
||||
|
||||
args_memcpy:
|
||||
|
||||
; Copies the buffer and format to rcx and rdx
|
||||
mov rdx, [rsp + 08h]
|
||||
mov rcx, [rsp]
|
||||
|
||||
; Finally, calls sscanf using the current stack
|
||||
call sscanf
|
||||
|
||||
; At this point the return value is alreay in rax
|
||||
|
||||
; Restores rsp
|
||||
mov rsp, qword ptr [rbp - 8]
|
||||
|
||||
; Undoes the alloca
|
||||
add rsp, r10
|
||||
|
||||
; Restores the space for local variables
|
||||
add rsp, 08h
|
||||
|
||||
; Remember, the return value is already in rax since the sscanf call
|
||||
ret
|
||||
|
||||
vsscanf_proxy_win64 ENDP
|
||||
|
||||
END
|
Reference in New Issue
Block a user