More cleanup. Remove files that are not strictly required.

2009-03-01 02:38:24 +00:00 · 2009-03-01 02:38:24 +00:00 · 03c9ec0f62
commit 03c9ec0f62
parent 88fc5ca18e
26 changed files with 0 additions and 3939 deletions
--- a/src/nvcore/BitArray.h
+++ b/src/nvcore/BitArray.h
@ -1,168 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_CORE_BITARRAY_H
 #define NV_CORE_BITARRAY_H
 #include <nvcore/nvcore.h>
 #include <nvcore/Containers.h>
 namespace nv
 {
 /// Count the bits of @a x.
 inline uint bitsSet(uint8 x) {
 	uint count = 0;
 	for(; x != 0; x >>= 1) {
 		count += (x & 1);
 	}
 	return count;
 }
 /// Count the bits of @a x.
 inline uint bitsSet(uint32 x, int bits) {
 	uint count = 0;
 	for(; x != 0 && bits != 0; x >>= 1, bits--) {
 		count += (x & 1);
 	}
 	return count;
 }
 /// Simple bit array.
 class BitArray
 {
 public:
 	/// Default ctor.
 	BitArray() {}
 	/// Ctor with initial m_size.
 	BitArray(uint sz)
 	{
 		resize(sz);
 	}
 	/// Get array m_size.
 	uint size() const { return m_size; }
 	/// Clear array m_size.
 	void clear() { resize(0); }
 	/// Set array m_size.
 	void resize(uint sz)
 	{ 
 		m_size = sz;
 		m_bitArray.resize( (m_size + 7) >> 3 );
 	}
 	/// Get bit.
 	bool bitAt(uint b) const
 	{
 		nvDebugCheck( b < m_size );
 		return (m_bitArray[b >> 3] & (1 << (b & 7))) != 0;
 	}
 	/// Set a bit.
 	void setBitAt(uint b)
 	{
 		nvDebugCheck( b < m_size );
 		m_bitArray[b >> 3] |=  (1 << (b & 7));
 	}
 	/// Clear a bit.
 	void clearBitAt( uint b )
 	{
 		nvDebugCheck( b < m_size );
 		m_bitArray[b >> 3] &= ~(1 << (b & 7));
 	}
 	/// Clear all the bits.
 	void clearAll()
 	{
 		memset(m_bitArray.mutableBuffer(), 0, m_bitArray.size());
 	}
 	/// Set all the bits.
 	void setAll()
 	{
 		memset(m_bitArray.mutableBuffer(), 0xFF, m_bitArray.size());
 	}
 	/// Toggle all the bits.
 	void toggleAll()
 	{
 		const uint byte_num = m_bitArray.size();
 		for(uint b = 0; b < byte_num; b++) {
 			m_bitArray[b] ^= 0xFF;
 		}
 	}
 	/// Get a byte of the bit array.
 	const uint8 & byteAt(uint index) const
 	{
 		return m_bitArray[index];
 	}
 	/// Set the given byte of the byte array.
 	void setByteAt(uint index, uint8 b)
 	{
 		m_bitArray[index] = b;
 	}
 	/// Count the number of bits set.
 	uint countSetBits() const
 	{
 		const uint num = m_bitArray.size();
 		if( num == 0 ) {
 			return 0;
 		}
 		uint count = 0;				
 		for(uint i = 0; i < num - 1; i++) {
 			count += bitsSet(m_bitArray[i]);
 		}
 		count += bitsSet(m_bitArray[num-1], m_size & 0x7);
 		//piDebugCheck(count + countClearBits() == m_size);
 		return count;
 	}
 	/// Count the number of bits clear.
 	uint countClearBits() const {
 		const uint num = m_bitArray.size();
 		if( num == 0 ) {
 			return 0;
 		}
 		uint count = 0;
 		for(uint i = 0; i < num - 1; i++) {
 			count += bitsSet(~m_bitArray[i]);
 		}
 		count += bitsSet(~m_bitArray[num-1], m_size & 0x7);
 		//piDebugCheck(count + countSetBits() == m_size);
 		return count;
 	}
 	friend void swap(BitArray & a, BitArray & b)
 	{
 		swap(a.m_size, b.m_size);
 		swap(a.m_bitArray, b.m_bitArray);
 	}
 private:
 	/// Number of bits stored.
 	uint m_size;
 	/// Array of bits.
 	Array<uint8> m_bitArray;
 };
 } // nv namespace
 #endif // _PI_CORE_BITARRAY_H_
--- a/src/nvcore/CMakeLists.txt
+++ b/src/nvcore/CMakeLists.txt
@ -7,9 +7,6 @@ SET(CORE_SRCS
 	DefsGnucWin32.h
 	DefsVcWin32.h
 	Ptr.h
 	RefCounted.h
 	RefCounted.cpp
 	BitArray.h
 	Memory.h
 	Memory.cpp
 	Debug.h
@ -17,10 +14,6 @@ SET(CORE_SRCS
 	Containers.h
 	StrLib.h
 	StrLib.cpp
 	Radix.h
 	Radix.cpp
 	CpuInfo.h
 	CpuInfo.cpp
 	Algorithms.h
 	Timer.h
 	Library.h
@ -31,41 +24,11 @@ SET(CORE_SRCS
 	TextReader.cpp
 	TextWriter.h
 	TextWriter.cpp
 	Tokenizer.h
 	Tokenizer.cpp
 	FileSystem.h
 	FileSystem.cpp)
 INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
 # For Windows64 in MSVC we need to add the assembly version of vsscanf
 IF(MSVC AND NV_SYSTEM_PROCESSOR STREQUAL "AMD64")
  SET(VSSCANF_ASM_NAME "vsscanf_proxy_win64")
  IF(MSVC_IDE)
    # $(IntDir) is a macro expanded to the intermediate directory of the selected solution configuration
    SET(VSSCANF_ASM_INTDIR "$(IntDir)")
  ELSE(MSVC_IDE)
    # For some reason the NMake generator doesn't work properly with the generated .obj source:
 	# it requires the absolute path. So this is a hack which worked as of cmake 2.6.0 patch 0
 	GET_FILENAME_COMPONENT(VSSCANF_ASM_INTDIR 
 	    "${nvcore_BINARY_DIR}/CMakeFiles/nvcore.dir" ABSOLUTE)
  ENDIF(MSVC_IDE)
  SET(VSSCANF_ASM_SRC "${CMAKE_CURRENT_SOURCE_DIR}/${VSSCANF_ASM_NAME}.masm")
  SET(VSSCANF_ASM_OBJ "${VSSCANF_ASM_INTDIR}/${VSSCANF_ASM_NAME}.obj")
  # Adds the assembly output to the sources and adds the custom command to generate it
  SET(CORE_SRCS
 	${CORE_SRCS}
 	${VSSCANF_ASM_OBJ}
  )
  ADD_CUSTOM_COMMAND(OUTPUT ${VSSCANF_ASM_OBJ}
 					 MAIN_DEPENDENCY ${VSSCANF_ASM_SRC}
 					 COMMAND ml64
 					 ARGS  /nologo /Fo ${VSSCANF_ASM_OBJ} /c /Cx ${VSSCANF_ASM_SRC}
  )
 ENDIF(MSVC AND NV_SYSTEM_PROCESSOR STREQUAL "AMD64")
 # targets
 ADD_DEFINITIONS(-DNVCORE_EXPORTS)
--- a/src/nvcore/Constraints.h
+++ b/src/nvcore/Constraints.h
@ -1,59 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_CORE_ALGORITHMS_H
 #define NV_CORE_ALGORITHMS_H
 #include <nvcore/nvcore.h>
 namespace nv
 {
 	// Cool constraints from "Imperfect C++"
 	// must_be_pod
 	template <typename T>
 	struct must_be_pod
 	{
 		static void constraints()
 		{
 			union { T T_is_not_POD_type; };
 		}
 	};
 	// must_be_pod_or_void
 	template <typename T>
 	struct must_be_pod_or_void
 	{
 		static void constraints()
 		{
 			union { T T_is_not_POD_type; };
 		}
 	};
 	template <> struct must_be_pod_or_void<void> {};
 	// size_of
 	template <typename T>
 	struct size_of
 	{
 		enum { value = sizeof(T) };
 	};
 	template <> 
 	struct size_of<void>
 	{
 		enum { value = 0 };
 	};
 	// must_be_same_size
 	template <typename T1, typename T2>
 	struct must_be_same_size
 	{
 		static void constraints()
 		{
 			const int T1_not_same_size_as_T2 = size_of<T1>::value == size_of<T2>::value;
 			int i[T1_not_same_size_as_T2];
 		}
 	};
 } // nv namespace
 #endif // NV_CORE_ALGORITHMS_H
--- a/src/nvcore/CpuInfo.cpp
+++ b/src/nvcore/CpuInfo.cpp
@ -1,162 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #include <nvcore/CpuInfo.h>
 #include <nvcore/Debug.h>
 using namespace nv;
 #if NV_OS_WIN32
 #define _WIN32_WINNT 0x0501
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
 static bool isWow64()
 {
 	LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "IsWow64Process");
 	BOOL bIsWow64 = FALSE;
 	if (NULL != fnIsWow64Process)
 	{
 		if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
 		{
 			return false;
 		}
 	}
 	return bIsWow64 == TRUE;
 }
 #endif // NV_OS_WIN32
 #if NV_OS_LINUX
 #include <string.h>
 #include <sched.h>
 #endif // NV_OS_LINUX
 #if NV_OS_DARWIN
 #include <sys/types.h>
 #include <sys/sysctl.h>
 #endif // NV_OS_DARWIN
 // Initialize the data and the local defines, which are designed
 // to match the positions in cpuid
 uint CpuInfo::m_cpu = ~0x0;
 uint CpuInfo::m_procCount = 0;
 #define NV_CPUINFO_MMX_MASK  (1<<23)
 #define NV_CPUINFO_SSE_MASK  (1<<25)
 #define NV_CPUINFO_SSE2_MASK (1<<26)
 #define NV_CPUINFO_SSE3_MASK (1)
 uint CpuInfo::processorCount()
 {
 	if (m_procCount == 0) {
 #if NV_OS_WIN32
 		SYSTEM_INFO sysInfo;
 		typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
 		if (isWow64())
 		{
 			GetNativeSystemInfo(&sysInfo);
 		}
 		else
 		{
 			GetSystemInfo(&sysInfo);
 		}
 		uint count = (uint)sysInfo.dwNumberOfProcessors;
 		m_procCount = count;
 #elif NV_OS_LINUX
 		// Code from x264 (July 6 snapshot) cpu.c:271
 		uint bit;
 		uint np;
 		cpu_set_t p_aff;
 		memset( &p_aff, 0, sizeof(p_aff) );
 		sched_getaffinity( 0, sizeof(p_aff), &p_aff );
 		for( np = 0, bit = 0; bit < sizeof(p_aff); bit++ )
 			np += (((uint8 *)&p_aff)[bit / 8] >> (bit % 8)) & 1;
 		m_procCount = np;
 #elif NV_OS_DARWIN
 		// Code from x264 (July 6 snapshot) cpu.c:286
 		uint numberOfCPUs;
 		size_t length = sizeof( numberOfCPUs );
 		if( sysctlbyname("hw.ncpu", &numberOfCPUs, &length, NULL, 0) )
 		{
 			numberOfCPUs = 1;
 		}
 		m_procCount = numberOfCPUs;
 #else
 		m_procCount = 1;
 #endif
 	}
 	nvDebugCheck(m_procCount > 0);
 	return m_procCount;
 }
 uint CpuInfo::coreCount()
 {
 	return 1;
 }
 bool CpuInfo::hasMMX()
 {
 	return (cpu() & NV_CPUINFO_MMX_MASK) != 0;
 }
 bool CpuInfo::hasSSE()
 {
 	return (cpu() & NV_CPUINFO_SSE_MASK) != 0;
 }
 bool CpuInfo::hasSSE2()
 {
 	return (cpu() & NV_CPUINFO_SSE2_MASK) != 0;
 }
 bool CpuInfo::hasSSE3()
 {
 	return (cpu() & NV_CPUINFO_SSE3_MASK) != 0;
 }
 inline int CpuInfo::cpu() {
 	if (m_cpu == ~0x0) {
 		m_cpu = 0;
 #if NV_CC_MSVC
 		int CPUInfo[4] = {-1};
 		__cpuid(CPUInfo, /*InfoType*/ 1);
 		if (CPUInfo[2] & NV_CPUINFO_SSE3_MASK) {
 			m_cpu |= NV_CPUINFO_SSE3_MASK;
 		}
 		if (CPUInfo[3] & NV_CPUINFO_MMX_MASK) {
 			m_cpu |= NV_CPUINFO_MMX_MASK;
 		}
 		if (CPUInfo[3] & NV_CPUINFO_SSE_MASK) {
 			m_cpu |= NV_CPUINFO_SSE_MASK;
 		}
 		if (CPUInfo[3] & NV_CPUINFO_SSE2_MASK) {
 			m_cpu |= NV_CPUINFO_SSE2_MASK;
 		}
 #elif NV_CC_GNUC
 		// TODO: add the proper inline assembly
 #if NV_CPU_X86
 #elif NV_CPU_X86_64
 #endif	// NV_CPU_X86_64
 #endif	// NV_CC_GNUC
 	}
 	return m_cpu;
 }
--- a/src/nvcore/CpuInfo.h
+++ b/src/nvcore/CpuInfo.h
@ -1,109 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_CORE_CPUINFO_H
 #define NV_CORE_CPUINFO_H
 #include <nvcore/nvcore.h>
 #if NV_CC_MSVC
 #if _MSC_VER >= 1400
 #	include <intrin.h> // __rdtsc
 #endif
 #endif
 namespace nv
 {
 	// CPU Information.
 	class CpuInfo
 	{
 	protected:
 		static int cpu();
 	private:
 		// Cache of the CPU data
 		static uint m_cpu;
 		static uint m_procCount;
 	public:
 		static uint processorCount();
 		static uint coreCount();
 		static bool hasMMX();
 		static bool hasSSE();
 		static bool hasSSE2();
 		static bool hasSSE3();
 	};
 #if NV_CC_MSVC
 #if _MSC_VER < 1400
       inline uint64 rdtsc()
        {
 		uint64 t;
 		__asm rdtsc 
 		__asm mov DWORD PTR [t], eax 
 		__asm mov DWORD PTR [t+4], edx
 		return t;
        }	
 #else
 	#pragma intrinsic(__rdtsc)
 	inline uint64 rdtsc()
 	{
 		return __rdtsc();
 	}
 #endif
 #endif
 #if NV_CC_GNUC
 #if defined(__i386__)
 	inline /*volatile*/ uint64 rdtsc()
 	{
 		uint64 x;
 		//__asm__ volatile ("rdtsc" : "=A" (x));
 		__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
 		return x;
 	} 
 #elif defined(__x86_64__)
 	static __inline__ uint64 rdtsc(void)
 	{
 		unsigned int hi, lo;
 		__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
 		return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
 	}
 #elif defined(__powerpc__)
 	static __inline__ uint64 rdtsc(void)
 	{
 		uint64 result=0;
 		unsigned long int upper, lower, tmp;
 		__asm__ volatile(
 					"0:                  \n"
 					"\tmftbu   %0           \n"
 					"\tmftb    %1           \n"
 					"\tmftbu   %2           \n"
 					"\tcmpw    %2,%0        \n"
 					"\tbne     0b         \n"
 					: "=r"(upper),"=r"(lower),"=r"(tmp)
 					);
 		result = upper;
 		result = result<<32;
 		result = result|lower;
 		return(result);
 	}
 #endif
 #endif // NV_CC_GNUC
 } // nv namespace
 #endif // NV_CORE_CPUINFO_H
--- a/src/nvcore/Prefetch.h
+++ b/src/nvcore/Prefetch.h
@ -1,30 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_CORE_PREFETCH_H
 #define NV_CORE_PREFETCH_H
 #include <nvcore/nvcore.h>
 // nvPrefetch
 #if NV_CC_GNUC
 #define nvPrefetch(ptr)	__builtin_prefetch(ptr)
 #elif NV_CC_MSVC 
 // Uses SSE Intrinsics for both x86 and x86_64
 #include <xmmintrin.h>
 __forceinline void nvPrefetch(const void * mem)
 {
 	_mm_prefetch(static_cast<const char*>(mem), _MM_HINT_T0);	/* prefetcht0  */
 //	_mm_prefetch(static_cast<const char*>(mem), _MM_HINT_NTA);	/* prefetchnta */
 }
 #else
 // do nothing in other case.
 #define nvPrefetch(ptr)
 #endif // NV_CC_MSVC
 #endif // NV_CORE_PREFETCH_H
--- a/src/nvcore/Radix.cpp
+++ b/src/nvcore/Radix.cpp
@ -1,484 +0,0 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /**
 *	Contains source code from the article "Radix Sort Revisited".
 *	\file		Radix.cpp
 *	\author		Pierre Terdiman
 *	\date		April, 4, 2000
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // References:
 // http://www.codercorner.com/RadixSortRevisited.htm
 // http://www.stereopsis.com/radix.html
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /**
 *	Revisited Radix Sort.
 *	This is my new radix routine:
 *  - it uses indices and doesn't recopy the values anymore, hence wasting less ram
 *  - it creates all the histograms in one run instead of four
 *  - it sorts words faster than dwords and bytes faster than words
 *  - it correctly sorts negative floating-point values by patching the offsets
 *  - it automatically takes advantage of temporal coherence
 *  - multiple keys support is a side effect of temporal coherence
 *  - it may be worth recoding in asm... (mainly to use FCOMI, FCMOV, etc) [it's probably memory-bound anyway]
 *
 *	History:
 *	- 08.15.98: very first version
 *	- 04.04.00: recoded for the radix article
 *	- 12.xx.00: code lifting
 *	- 09.18.01: faster CHECK_PASS_VALIDITY thanks to Mark D. Shattuck (who provided other tips, not included here)
 *	- 10.11.01: added local ram support
 *	- 01.20.02: bugfix! In very particular cases the last pass was skipped in the float code-path, leading to incorrect sorting......
 *	- 01.02.02:	- "mIndices" renamed => "mRanks". That's a rank sorter after all.
 *				- ranks are not "reset" anymore, but implicit on first calls
 *	- 07.05.02:	offsets rewritten with one less indirection.
 *	- 11.03.02:	"bool" replaced with RadixHint enum
 *	- 07.15.04:	stack-based radix added
 *				- we want to use the radix sort but without making it static, and without allocating anything.
 *				- we internally allocate two arrays of ranks. Each of them has N uint32s to sort N values.
 *				- 1Mb/2/sizeof(uint32) = 131072 values max, at the same time.
 *	- 09.22.04:	- adapted to MacOS by Chris Lamb
 *	- 01.12.06:	- added optimizations suggested by Kyle Hubert
 *	- 04.06.08:	- Fix bug negative zero sorting bug by Ignacio Castaño
 *
 *	\class		RadixSort
 *	\author		Pierre Terdiman
 *	\version	1.5
 *	\date		August, 15, 1998
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Header
 #include <nvcore/Radix.h>
 #include <string.h> // memset
 //using namespace IceCore;
 #define INVALIDATE_RANKS	mCurrentSize|=0x80000000
 #define VALIDATE_RANKS		mCurrentSize&=0x7fffffff
 #define CURRENT_SIZE		(mCurrentSize&0x7fffffff)
 #define INVALID_RANKS		(mCurrentSize&0x80000000)
 #if NV_BIG_ENDIAN
 	#define H0_OFFSET	768
 	#define H1_OFFSET	512
 	#define H2_OFFSET	256
 	#define H3_OFFSET	0
 	#define BYTES_INC	(3-j)
 #else 
 	#define H0_OFFSET	0
 	#define H1_OFFSET	256
 	#define H2_OFFSET	512
 	#define H3_OFFSET	768
 	#define BYTES_INC	j
 #endif
 #define CREATE_HISTOGRAMS(type, buffer)														\
 	/* Clear counters/histograms */															\
 	memset(mHistogram, 0, 256*4*sizeof(uint32));											\
 																							\
 	/* Prepare to count */																	\
 	const uint8* p = (const uint8*)input;													\
 	const uint8* pe = &p[nb*4];																\
 	uint32* h0= &mHistogram[H0_OFFSET];	/* Histogram for first pass (LSB)	*/				\
 	uint32* h1= &mHistogram[H1_OFFSET];	/* Histogram for second pass		*/				\
 	uint32* h2= &mHistogram[H2_OFFSET];	/* Histogram for third pass			*/				\
 	uint32* h3= &mHistogram[H3_OFFSET];	/* Histogram for last pass (MSB)	*/				\
 																							\
 	bool AlreadySorted = true;	/* Optimism... */											\
 																							\
 	if(INVALID_RANKS)																		\
 	{																						\
 		/* Prepare for temporal coherence */												\
 		type* Running = (type*)buffer;														\
 		type PrevVal = *Running;															\
 																							\
 		while(p!=pe)																		\
 		{																					\
 			/* Read input buffer in previous sorted order */								\
 			type Val = *Running++;															\
 			/* Check whether already sorted or not */										\
 			if(Val<PrevVal)	{ AlreadySorted = false; break; } /* Early out */				\
 			/* Update for next iteration */													\
 			PrevVal = Val;																	\
 																							\
 			/* Create histograms */															\
 			h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;									\
 		}																					\
 																							\
 		/* If all input values are already sorted, we just have to return and leave the */	\
 		/* previous list unchanged. That way the routine may take advantage of temporal */	\
 		/* coherence, for example when used to sort transparent faces.					*/	\
 		if(AlreadySorted)																	\
 		{																					\
 			mNbHits++;																		\
 			for(uint32 i=0;i<nb;i++)	mRanks[i] = i;										\
 			return *this;																	\
 		}																					\
 	}																						\
 	else																					\
 	{																						\
 		/* Prepare for temporal coherence */												\
 		const uint32* Indices = mRanks;														\
 		type PrevVal = (type)buffer[*Indices];												\
 																							\
 		while(p!=pe)																		\
 		{																					\
 			/* Read input buffer in previous sorted order */								\
 			type Val = (type)buffer[*Indices++];											\
 			/* Check whether already sorted or not */										\
 			if(Val<PrevVal)	{ AlreadySorted = false; break; } /* Early out */				\
 			/* Update for next iteration */													\
 			PrevVal = Val;																	\
 																							\
 			/* Create histograms */															\
 			h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;									\
 		}																					\
 																							\
 		/* If all input values are already sorted, we just have to return and leave the */	\
 		/* previous list unchanged. That way the routine may take advantage of temporal */	\
 		/* coherence, for example when used to sort transparent faces.					*/	\
 		if(AlreadySorted)	{ mNbHits++; return *this;	}									\
 	}																						\
 																							\
 	/* Else there has been an early out and we must finish computing the histograms */		\
 	while(p!=pe)																			\
 	{																						\
 		/* Create histograms without the previous overhead */								\
 		h0[*p++]++;	h1[*p++]++;	h2[*p++]++;	h3[*p++]++;										\
 	}
 #define CHECK_PASS_VALIDITY(pass)															\
 	/* Shortcut to current counters */														\
 	const uint32* CurCount = &mHistogram[pass<<8];											\
 																							\
 	/* Reset flag. The sorting pass is supposed to be performed. (default) */				\
 	bool PerformPass = true;																\
 																							\
 	/* Check pass validity */																\
 																							\
 	/* If all values have the same byte, sorting is useless. */								\
 	/* It may happen when sorting bytes or words instead of dwords. */						\
 	/* This routine actually sorts words faster than dwords, and bytes */					\
 	/* faster than words. Standard running time (O(4*n))is reduced to O(2*n) */				\
 	/* for words and O(n) for bytes. Running time for floats depends on actual values... */	\
 																							\
 	/* Get first byte */																	\
 	uint8 UniqueVal = *(((uint8*)input)+pass);												\
 																							\
 	/* Check that byte's counter */															\
 	if(CurCount[UniqueVal]==nb)	PerformPass=false;
 using namespace nv;
 /// Constructor.
 RadixSort::RadixSort() : mRanks(NULL), mRanks2(NULL), mCurrentSize(0), mTotalCalls(0), mNbHits(0), mDeleteRanks(true)
 {
 	// Initialize indices
 	INVALIDATE_RANKS;
 }
 /// Destructor.
 RadixSort::~RadixSort()
 {
 	// Release everything
 	if(mDeleteRanks)
 	{
 		delete [] mRanks2;
 		delete [] mRanks;
 	}
 }
 /// Resizes the inner lists.
 /// \param		nb				[in] new size (number of dwords)
 /// \return		true if success
 bool RadixSort::resize(uint32 nb)
 {
 	if(mDeleteRanks)
 	{
 		// Free previously used ram
 		delete [] mRanks2;
 		delete [] mRanks;
 		// Get some fresh one
 		mRanks	= new uint32[nb];
 		mRanks2	= new uint32[nb];
 	}
 	return true;
 }
 inline void RadixSort::checkResize(uint32 nb)
 {
 	uint32 CurSize = CURRENT_SIZE;
 	if(nb!=CurSize)
 	{
 		if(nb>CurSize) resize(nb);
 		mCurrentSize = nb;
 		INVALIDATE_RANKS;
 	}
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /**
 *	Main sort routine.
 *	This one is for integer values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data.
 *	\param		input			[in] a list of integer values to sort
 *	\param		nb				[in] number of values to sort
 *	\param		signedvalues	[in] true to handle negative values, false if you know your input buffer only contains positive values
 *	\return		Self-Reference
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 RadixSort& RadixSort::sort(const uint32* input, uint32 nb, bool signedValues/*=true*/)
 {
 	// Checkings
 	if(!input || !nb || nb&0x80000000)	return *this;
 	// Stats
 	mTotalCalls++;
 	// Resize lists if needed
 	checkResize(nb);
 	// Allocate histograms & offsets on the stack
 	uint32 mHistogram[256*4];
 	uint32* mLink[256];
 	// Create histograms (counters). Counters for all passes are created in one run.
 	// Pros:	read input buffer once instead of four times
 	// Cons:	mHistogram is 4Kb instead of 1Kb
 	// We must take care of signed/unsigned values for temporal coherence.... I just
 	// have 2 code paths even if just a single opcode changes. Self-modifying code, someone?
 	if(!signedValues)	{ CREATE_HISTOGRAMS(uint32, input);	}
 	else				{ CREATE_HISTOGRAMS(int32, input);	}
 	// Radix sort, j is the pass number (0=LSB, 3=MSB)
 	for(uint32 j=0;j<4;j++)
 	{
 		CHECK_PASS_VALIDITY(j);
 		// Sometimes the fourth (negative) pass is skipped because all numbers are negative and the MSB is 0xFF (for example). This is
 		// not a problem, numbers are correctly sorted anyway.
 		if(PerformPass)
 		{
 			// Should we care about negative values?
 			if(j!=3 || !signedValues)
 			{
 				// Here we deal with positive values only
 				// Create offsets
 				mLink[0] = mRanks2;
 				for(uint32 i=1;i<256;i++)		mLink[i] = mLink[i-1] + CurCount[i-1];
 			}
 			else
 			{
 				// This is a special case to correctly handle negative integers. They're sorted in the right order but at the wrong place.
 				mLink[128] = mRanks2;
 				for(uint32 i=129;i<256;i++)	mLink[i] = mLink[i-1] + CurCount[i-1];
 				mLink[0] = mLink[255] + CurCount[255];
 				for(uint32 i=1;i<128;i++)	mLink[i] = mLink[i-1] + CurCount[i-1];
 			}
 			// Perform Radix Sort
 			const uint8* InputBytes	= (const uint8*)input;
 			InputBytes += BYTES_INC;
 			if(INVALID_RANKS)
 			{
 				for(uint32 i=0;i<nb;i++)	*mLink[InputBytes[i<<2]]++ = i;
 				VALIDATE_RANKS;
 			}
 			else
 			{
 				const uint32* Indices		= mRanks;
 				const uint32* IndicesEnd	= &mRanks[nb];
 				while(Indices!=IndicesEnd)
 				{
 					uint32 id = *Indices++;
 					*mLink[InputBytes[id<<2]]++ = id;
 				}
 			}
 			// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
 			uint32* Tmp = mRanks;
 			mRanks = mRanks2;
 			mRanks2 = Tmp;
 		}
 	}
 	return *this;
 }
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /**
 *	Main sort routine.
 *	This one is for floating-point values. After the call, mIndices contains a list of indices in sorted order, i.e. in the order you may process your data.
 *	\param		input			[in] a list of floating-point values to sort
 *	\param		nb				[in] number of values to sort
 *	\return		Self-Reference
 *	\warning	only sorts IEEE floating-point values
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 RadixSort& RadixSort::sort(const float* input2, uint32 nb)
 {
 	// Checkings
 	if(!input2 || !nb || nb&0x80000000)	return *this;
 	// Stats
 	mTotalCalls++;
 	const uint32* input = (const uint32*)input2;
 	// Resize lists if needed
 	checkResize(nb);
 	// Allocate histograms & offsets on the stack
 	uint32 mHistogram[256*4];
 	uint32* mLink[256];
 	// Create histograms (counters). Counters for all passes are created in one run.
 	// Pros:	read input buffer once instead of four times
 	// Cons:	mHistogram is 4Kb instead of 1Kb
 	// Floating-point values are always supposed to be signed values, so there's only one code path there.
 	// Please note the floating point comparison needed for temporal coherence! Although the resulting asm code
 	// is dreadful, this is surprisingly not such a performance hit - well, I suppose that's a big one on first
 	// generation Pentiums....We can't make comparison on integer representations because, as Chris said, it just
 	// wouldn't work with mixed positive/negative values....
 	{ CREATE_HISTOGRAMS(float, input2); }
 	// Radix sort, j is the pass number (0=LSB, 3=MSB)
 	for(uint32 j=0;j<4;j++)
 	{
 		// Should we care about negative values?
 		if(j!=3)
 		{
 			// Here we deal with positive values only
 			CHECK_PASS_VALIDITY(j);
 			if(PerformPass)
 			{
 				// Create offsets
 				mLink[0] = mRanks2;
 				for(uint32 i=1;i<256;i++)		mLink[i] = mLink[i-1] + CurCount[i-1];
 				// Perform Radix Sort
 				const uint8* InputBytes = (const uint8*)input;
 				InputBytes += BYTES_INC;
 				if(INVALID_RANKS)
 				{
 					for(uint32 i=0;i<nb;i++)	*mLink[InputBytes[i<<2]]++ = i;
 					VALIDATE_RANKS;
 				}
 				else
 				{
 					const uint32* Indices		= mRanks;
 					const uint32* IndicesEnd	= &mRanks[nb];
 					while(Indices!=IndicesEnd)
 					{
 						uint32 id = *Indices++;
 						*mLink[InputBytes[id<<2]]++ = id;
 					}
 				}
 				// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
 				uint32* Tmp = mRanks;
 				mRanks = mRanks2;
 				mRanks2 = Tmp;
 			}
 		}
 		else
 		{
 			// This is a special case to correctly handle negative values
 			CHECK_PASS_VALIDITY(j);
 			if(PerformPass)
 			{
 				mLink[255] = mRanks2 + CurCount[255];
 				for(uint32 i = 254; i > 126; i--) mLink[i] = mLink[i+1] + CurCount[i];
 				mLink[0] = mLink[127] + CurCount[127];
 				for(uint32 i = 1; i < 127; i++) mLink[i] = mLink[i-1] + CurCount[i-1];
 				// Perform Radix Sort
 				if(INVALID_RANKS)
 				{
 					for(uint32 i=0;i<nb;i++)
 					{
 						uint32 Radix = input[i]>>24;							// Radix byte, same as above. AND is useless here (uint32).
 						// ### cmp to be killed. Not good. Later.
 						if(Radix<128)		*mLink[Radix]++ = i;		// Number is positive, same as above
 						else				*(--mLink[Radix]) = i;		// Number is negative, flip the sorting order
 					}
 					VALIDATE_RANKS;
 				}
 				else
 				{
 					for(uint32 i=0;i<nb;i++)
 					{
 						uint32 Radix = input[mRanks[i]]>>24;							// Radix byte, same as above. AND is useless here (uint32).
 						// ### cmp to be killed. Not good. Later.
 						if(Radix<128)		*mLink[Radix]++ = mRanks[i];		// Number is positive, same as above
 						else				*(--mLink[Radix]) = mRanks[i];		// Number is negative, flip the sorting order
 					}
 				}
 				// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
 				uint32* Tmp = mRanks;
 				mRanks = mRanks2;
 				mRanks2 = Tmp;
 			}
 			else
 			{
 				// The pass is useless, yet we still have to reverse the order of current list if all values are negative.
 				if(UniqueVal>=128)
 				{
 					if(INVALID_RANKS)
 					{
 						// ###Possible?
 						for(uint32 i=0;i<nb;i++)	mRanks2[i] = nb-i-1;
 						VALIDATE_RANKS;
 					}
 					else
 					{
 						for(uint32 i=0;i<nb;i++)	mRanks2[i] = mRanks[nb-i-1];
 					}
 					// Swap pointers for next pass. Valid indices - the most recent ones - are in mRanks after the swap.
 					uint32* Tmp = mRanks;
 					mRanks = mRanks2;
 					mRanks2 = Tmp;
 				}
 			}
 		}
 	}
 	return *this;
 }
 bool RadixSort::setRankBuffers(uint32* ranks0, uint32* ranks1)
 {
 	if(!ranks0 || !ranks1)	return false;
 	mRanks			= ranks0;
 	mRanks2			= ranks1;
 	mDeleteRanks	= false;
 	return true;
 }
 RadixSort & RadixSort::sort(const Array<int> & input)
 {
 	return sort((const uint32 *)input.buffer(), input.count(), true);
 }
 RadixSort & RadixSort::sort(const Array<uint> & input)
 {
 	return sort(input.buffer(), input.count(), false);
 }
 RadixSort &	RadixSort::sort(const Array<float> & input)
 {
 	return sort(input.buffer(), input.count());
 }
--- a/src/nvcore/Radix.h
+++ b/src/nvcore/Radix.h
@ -1,73 +0,0 @@
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 /**
 *	Contains source code from the article "Radix Sort Revisited".
 *	\file		Radix.h
 *	\author		Pierre Terdiman
 *	\date		April, 4, 2000
 */
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 ///////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
 // Include Guard
 #ifndef NV_CORE_RADIXSORT_H
 #define NV_CORE_RADIXSORT_H
 #include <nvcore/nvcore.h>
 #include <nvcore/Containers.h>
 namespace nv
 {
 	class NVCORE_CLASS RadixSort
 	{
 		NV_FORBID_COPY(RadixSort);
 	public:
 		// Constructor/Destructor
 		RadixSort();
 		~RadixSort();
 		// Sorting methods
 		RadixSort & sort(const uint32* input, uint32 nb, bool signedValues=true);
 		RadixSort &	sort(const float* input, uint32 nb);
 		// Helpers
 		RadixSort & sort(const Array<int> & input);
 		RadixSort & sort(const Array<uint> & input);
 		RadixSort & sort(const Array<float> & input);
 		//! Access to results. mRanks is a list of indices in sorted order, i.e. in the order you may further process your data
 		inline /*const*/ uint32 * ranks() /*const*/ { return mRanks; }
 		//! mIndices2 gets trashed on calling the sort routine, but otherwise you can recycle it the way you want.
 		inline uint32 * recyclable() const { return mRanks2; }
 		// Stats
 		//! Returns the total number of calls to the radix sorter.
 		inline uint32 totalCalls() const { return mTotalCalls; }
 		//! Returns the number of early exits due to temporal coherence.
 		inline uint32 hits() const { return mNbHits; }
 		bool setRankBuffers(uint32* ranks0, uint32* ranks1);
 	private:
 		uint32 mCurrentSize;    //!< Current size of the indices list
 		uint32 * mRanks;        //!< Two lists, swapped each pass
 		uint32 * mRanks2;
 		// Stats
 		uint32 mTotalCalls;     //!< Total number of calls to the sort routine
 		uint32 mNbHits;         //!< Number of early exits due to coherence
 		// Stack-radix
 		bool mDeleteRanks;      //!<
 		// Internal methods
 		void checkResize(uint32 nb);
 		bool resize(uint32 nb);
 	};
 } // nv namespace
 #endif // NV_CORE_RADIXSORT_H
--- a/src/nvcore/RefCounted.cpp
+++ b/src/nvcore/RefCounted.cpp
@ -1,9 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #include "RefCounted.h"
 using namespace nv;
 int nv::RefCounted::s_total_ref_count = 0;
 int nv::RefCounted::s_total_obj_count = 0;
--- a/src/nvcore/RefCounted.h
+++ b/src/nvcore/RefCounted.h
@ -1,119 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_CORE_REFCOUNTED_H
 #define NV_CORE_REFCOUNTED_H
 #include <nvcore/nvcore.h>
 #include <nvcore/Debug.h>
 #define NV_DECLARE_PTR(Class) \
 	template <class T> class SmartPtr; \
 	typedef SmartPtr<class Class> Class ## Ptr; \
 	typedef SmartPtr<const class Class> Class ## ConstPtr
 namespace nv
 {
 	/// Reference counted base class to be used with SmartPtr and WeakPtr.
 	class RefCounted
 	{
 		NV_FORBID_COPY(RefCounted);
 	public:
 		/// Ctor.
 		RefCounted() : m_count(0)/*, m_weak_proxy(NULL)*/
 		{
 			s_total_obj_count++;
 		}
 		/// Virtual dtor.
 		virtual ~RefCounted()
 		{
 			nvCheck( m_count == 0 );
 			nvCheck( s_total_obj_count > 0 );
 			s_total_obj_count--;
 		}
 		/// Increase reference count.
 		uint addRef() const
 		{
 			s_total_ref_count++;
 			m_count++;
 			return m_count;
 		}
 		/// Decrease reference count and remove when 0.
 		uint release() const
 		{
 			nvCheck( m_count > 0 );
 			s_total_ref_count--;
 			m_count--;
 			if( m_count == 0 ) {
 			//	releaseWeakProxy();
 				delete this;
 				return 0;
 			}
 			return m_count;
 		}
 	/*
 		/// Get weak proxy.
 		WeakProxy * getWeakProxy() const
 		{
 			if (m_weak_proxy == NULL) {
 				m_weak_proxy = new WeakProxy;
 				m_weak_proxy->AddRef();
 			}
 			return m_weak_proxy;
 		}
 		/// Release the weak proxy.	
 		void releaseWeakProxy() const
 		{
 			if (m_weak_proxy != NULL) {
 				m_weak_proxy->NotifyObjectDied();
 				m_weak_proxy->Release();
 				m_weak_proxy = NULL;
 			}
 		}
 	*/
 		/** @name Debug methods: */
 		//@{
 			/// Get reference count.
 			int refCount() const
 			{
 				return m_count;
 			}
 			/// Get total number of objects.
 			static int totalObjectCount()
 			{
 				return s_total_obj_count;
 			}
 			/// Get total number of references.
 			static int totalReferenceCount()
 			{
 				return s_total_ref_count;
 			}
 		//@}
 	private:
 		NVCORE_API static int s_total_ref_count;
 		NVCORE_API static int s_total_obj_count;
 		mutable int m_count;
 	//	mutable WeakProxy * weak_proxy;
 	};
 } // nv namespace
 #endif // NV_CORE_REFCOUNTED_H
--- a/src/nvcore/Tokenizer.cpp
+++ b/src/nvcore/Tokenizer.cpp
@ -1,259 +0,0 @@
 // This code is in the public domain -- castano@gmail.com
 #include "Tokenizer.h"
 #include <nvcore/StrLib.h>
 #include <stdio.h> // vsscanf
 #include <stdarg.h>	// va_list
 #include <stdlib.h>	// atof, atoi
 #if NV_CC_MSVC
 #if defined NV_CPU_X86
 /* vsscanf for Win32
 * Written 5/2003 by <mgix@mgix.com>
 * This code is in the Public Domain
 */
 #include <malloc.h> // alloca
 //#include <string.h>
 static int vsscanf(const char * buffer, const char * format, va_list argPtr)
 {
 	// Get an upper bound for the # of args
 	size_t count = 0;
 	const char *p = format;
 	while(1) {
 		char c = *(p++);
 		if(c==0) break;
 		if(c=='%' && (p[0]!='*' && p[0]!='%')) ++count;
 	}
 	// Make a local stack
 	size_t stackSize = (2+count)*sizeof(void*);
 	void **newStack = (void**)alloca(stackSize);
 	// Fill local stack the way sscanf likes it
 	newStack[0] = (void*)buffer;
 	newStack[1] = (void*)format;
 	memcpy(newStack+2, argPtr, count*sizeof(void*));
 	// @@ Use: CALL DWORD PTR [sscanf]
 	// Warp into system sscanf with new stack
 	int result;
 	void *savedESP;
 	__asm
 	{
 		mov     savedESP, esp
 		mov     esp, newStack
 #if _MSC_VER >= 1400
 		call	DWORD PTR [sscanf_s]
 #else
 		call	DWORD PTR [sscanf]
 #endif
 		mov     esp, savedESP
 		mov     result, eax
 	}
 	return result;
 }
 #elif defined NV_CPU_X86_64
 /* Prototype of the helper assembly function */
 #ifdef __cplusplus
 extern "C" {
 #endif
 int vsscanf_proxy_win64(const char * buffer, const char * format, va_list argPtr, __int64 count);
 #ifdef __cplusplus
 }
 #endif
 /* MASM64 version of the above vsscanf */
 static int vsscanf(const char * buffer, const char * format, va_list argPtr)
 {
 	// Get an upper bound for the # of args
 	__int64 count = 0;
 	const char *p = format;
 	while(1) {
 		char c = *(p++);
 		if(c==0) break;
 		if(c=='%' && (p[0]!='*' && p[0]!='%')) ++count;
 	}
 	return vsscanf_proxy_win64(buffer, format, argPtr, count);
 }
 /*#error vsscanf doesn't work on MSVC for x64*/
 #else
 #error Unknown processor for MSVC
 #endif
 #endif // NV_CC_MSVC
 using namespace nv;
 Token::Token() :
 	m_str(""), m_len(0)
 {
 }
 Token::Token(const Token & token) : 
 	m_str(token.m_str), m_len(token.m_len)
 {
 }
 Token::Token(const char * str, int len) : 
 	m_str(str), m_len(len)
 {
 }
 bool Token::operator==(const char * str) const
 {
 	return strncmp(m_str, str, m_len) == 0;
 }
 bool Token::operator!=(const char * str) const
 {
 	return strncmp(m_str, str, m_len) != 0;
 }
 bool Token::isNull()
 {
 	return m_len != 0;
 }
 float Token::toFloat() const
 {
 	return float(atof(m_str));
 }
 int Token::toInt() const
 {
 	return atoi(m_str);
 }
 uint Token::toUnsignedInt() const
 {
 	// @@ TBD
 	return uint(atoi(m_str));
 }
 String Token::toString() const
 {
 	return String(m_str, m_len);
 }
 bool Token::parse(const char * format, int count, ...) const
 {
 	va_list arg;
 	va_start(arg, count);
 	int readCount = vsscanf(m_str, format, arg);
 	va_end(arg);
 	return readCount == count;
 }
 Tokenizer::Tokenizer(Stream * stream) : 
 	m_reader(stream), m_lineNumber(0), m_columnNumber(0), m_delimiters("{}()="), m_spaces(" \t")
 {
 }
 bool Tokenizer::nextLine(bool skipEmptyLines /*= true*/)
 {
 	do {
 		if (!readLine()) {
 			return false;
 		}
 	}
 	while (!readToken() && skipEmptyLines);
 	return true;
 }
 bool Tokenizer::nextToken(bool skipEndOfLine /*= false*/)
 {
 	if (!readToken()) {
 		if (!skipEndOfLine) {
 			return false;
 		}
 		else {
 			return nextLine(true);
 		}
 	}
 	return true;
 }
 bool Tokenizer::readToken()
 {
 	skipSpaces();
 	const char * begin = m_line + m_columnNumber;
 	if (*begin == '\0') {
 		return false;
 	}
 	char c = readChar();
 	if (isDelimiter(c)) {
 		m_token = Token(begin, 1);
 		return true;
 	}
 	// @@ Add support for quoted tokens "", ''
 	int len = 0;
 	while (!isDelimiter(c) && !isSpace(c) && c != '\0') {
 		c = readChar();
 		len++;
 	}
 	m_columnNumber--;
 	m_token = Token(begin, len);
 	return true;
 }
 char Tokenizer::readChar()
 {
 	return m_line[m_columnNumber++];
 }
 bool Tokenizer::readLine()
 {
 	m_lineNumber++;
 	m_columnNumber = 0;
 	m_line = m_reader.readLine();
 	return m_line != NULL;
 }
 void Tokenizer::skipSpaces()
 {
 	while (isSpace(readChar())) {}
 	m_columnNumber--;
 }
 bool Tokenizer::isSpace(char c)
 {
 	uint i = 0;
 	while (m_spaces[i] != '\0') {
 		if (c == m_spaces[i]) {
 			return true;
 		}
 		i++;
 	}
 	return false;
 }
 bool Tokenizer::isDelimiter(char c)
 {
 	uint i = 0;
 	while (m_delimiters[i] != '\0') {
 		if (c == m_delimiters[i]) {
 			return true;
 		}
 		i++;
 	}
 	return false;
 }
--- a/src/nvcore/Tokenizer.h
+++ b/src/nvcore/Tokenizer.h
@ -1,98 +0,0 @@
 // This code is in the public domain -- castano@gmail.com
 #ifndef NV_CORE_TOKENIZER_H
 #define NV_CORE_TOKENIZER_H
 #include <nvcore/StrLib.h>
 #include <nvcore/Stream.h>
 #include <nvcore/TextReader.h>
 namespace nv
 {
 	/// A token produced by the Tokenizer.
 	class NVCORE_CLASS Token
 	{
 	public:
 		Token();
 		Token(const Token & token);
 		Token(const char * str, int len);		
 		bool operator==(const char * str) const;
 		bool operator!=(const char * str) const;
 		bool isNull();
 		float toFloat() const;
 		int toInt() const;
 		uint toUnsignedInt() const;
 		String toString() const;
 		bool parse(const char * format, int count, ...) const __attribute__((format (scanf, 2, 4)));
 	private:
 		const char * m_str;
 		int m_len;
 	};
 	/// Exception thrown by the tokenizer.
 	class TokenizerException
 	{
 	public:
 		TokenizerException(int line, int column) : m_line(line), m_column(column) {}
 		int line() const { return m_line; }
 		int column() const { return m_column; }
 	private:
 		int m_line;
 		int m_column;
 	};
 	// @@ Use enums instead of bools for clarity!
 	//enum SkipEmptyLines { skipEmptyLines, noSkipEmptyLines };
 	//enum SkipEndOfLine { skipEndOfLine, noSkipEndOfLine };
 	/// A simple stream tokenizer.
 	class NVCORE_CLASS Tokenizer
 	{
 	public:
 		Tokenizer(Stream * stream);
 		bool nextLine(bool skipEmptyLines = true);
 		bool nextToken(bool skipEndOfLine = false);
 		const Token & token() const { return m_token; }
 		int lineNumber() const { return m_lineNumber; }
 		int columnNumber() const { return m_columnNumber; }
 		void setDelimiters(const char * str) { m_delimiters = str; }
 		const char * delimiters() const { return m_delimiters; }
 		void setSpaces(const char * str) { m_spaces = str; }
 		const char * spaces() const { return m_spaces; }
 	private:
 		char readChar();
 		bool readLine();
 		bool readToken(); 
 		void skipSpaces();
 		bool isSpace(char c);
 		bool isDelimiter(char c);
 	private:
 		TextReader m_reader;
 		const char * m_line;
 		Token m_token;
 		int m_lineNumber;
 		int m_columnNumber;
 		const char * m_delimiters;
 		const char * m_spaces;
 	};
 } // nv namespace
 #endif // NV_CORE_TOKENIZER_H
--- a/src/nvcore/vsscanf_proxy_win64.masm
+++ b/src/nvcore/vsscanf_proxy_win64.masm
@ -1,124 +0,0 @@
 ; MASM x64 version of
 ; vsscanf for Win32
 ; originally written 5/2003 by <mgix@mgix.com>
 ;
 ; This was done because MSVC does not accept inline assembly code
 ; for the x64 platform, so this file implements almost the whole
 ; module in assembly using the amd64 ABI
 ;
 ; 06/17/2008 by edgarv [at] nvidia com
 ; Definition of memcpy
 memcpy	PROTO dest:Ptr, src:Ptr, numbytes:QWORD
 ; Definition of sscanf
 sscanf PROTO buffer:Ptr Byte, format:Ptr Byte, args:VARARG
 ; Start a code segment named "_TEXT" by default
 .CODE
 ; Entry point of our function: at this point we can use
 ; named parameters
 ALIGN 16
 PUBLIC vsscanf_proxy_win64
 ; Because the x64 code uses the fast call convention, only
 ; the arguments beyond the 4th one are available from the stack.
 ; The first four parameters are in RCX, RDX, R8 and R9
 ; Parameters:
 ;    const char* buffer
 ;    const char* format
 ;    va_list argPtr
 ;    size_t  count
 vsscanf_proxy_win64 PROC, \
 	buffer:PTR Byte, format:PTR Byte, argPtr:PTR, count:QWORD
  ; Allocates space for our local variable, savedRDP
  sub rsp, 08h
  ; Copies the parameters from the registers to the memory: before warping to
  ; sscanf we will call memcpy, and those registers can just dissapear!
  mov buffer, rcx
  mov format, rdx
  mov argPtr, r8
  mov count,  r9
  ; Allocate extra space in the stack for (2+count)*sizeof(void*),
  ; this is (2+count)*(8)
  mov r10, r9
  add r10, 2		; count += 2
  sal r10, 3		; count *= 8
  add r10, 0fh	; To force alignment to 16bytes
  and r10, 0fffffffffffffff0h
  sub rsp, r10	; Actual stack allocation
  ; Continues by copying all the arguments in the "alloca" space  
  mov [rsp], rcx		    ; newStack[0] = (void*)buffer;
  mov [rsp + 08h], rdx		; newStack[1] = (void*)format;
  ; Calls memcpy(newStack+2, argPtr, count*sizeof(void*));
  mov rcx, rsp
  add rcx, 010h		; newStack+2
  mov rdx, r8		; argPtr
  mov r8, r9
  sal r8, 3			; count*sizeof(void*)
  ; Prepares extra stack space as required by the ABI for 4 arguments, and calls memcpy
  sub rsp, 020h
  call memcpy
  ; Restore the stack
  add rsp, 020h
  ; Saves rsp in memory
  mov qword ptr [rbp - 8], rsp
  ; Does exactly the same trick as before: warp into system sscanf with the new stack,
  ; but this time we also setup the arguments in the registers according to the amd64 ABI
  ; If there was at least one argument (after buffer and format), we need to copy that
  ; to r8, and if there was a second one we must copy that to r9
  ; (the first arguments to sscanf are always the buffer and the format)
  mov r10, count
  ; Copy the first argument to r8 (if it exists)
  cmp r10, 0
  je args_memcpy
  mov r8, [rsp + 10h]
  ; Copy the second argument to r9 (if it exists)
  cmp r10, 1
  je args_memcpy
  mov r9, [rsp + 18h]
 args_memcpy:
  ; Copies the buffer and format to rcx and rdx
  mov rdx, [rsp + 08h]
  mov rcx, [rsp]
  ; Finally, calls sscanf using the current stack
  call sscanf
  ; At this point the return value is alreay in rax
  ; Restores rsp
  mov rsp, qword ptr [rbp - 8]
  ; Undoes the alloca
  add rsp, r10
  ; Restores the space for local variables
  add rsp, 08h
  ; Remember, the return value is already in rax since the sscanf call
  ret
 vsscanf_proxy_win64 ENDP
 END
--- a/src/nvimage/CMakeLists.txt
+++ b/src/nvimage/CMakeLists.txt
@ -14,15 +14,12 @@ SET(IMAGE_SRCS
 	ColorBlock.cpp
 	BlockDXT.h
 	BlockDXT.cpp
 	HoleFilling.h
 	HoleFilling.cpp
 	DirectDrawSurface.h
 	DirectDrawSurface.cpp
 	Quantize.h
 	Quantize.cpp
 	NormalMap.h
 	NormalMap.cpp
 	NormalMipmap.h
 	PsdFile.h
 	TgaFile.h
 	ColorSpace.h
--- a/src/nvimage/ConeMap.cpp
+++ b/src/nvimage/ConeMap.cpp
@ -1,122 +0,0 @@
 // Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
 // 
 // Permission is hereby granted, free of charge, to any person
 // obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without
 // restriction, including without limitation the rights to use,
 // copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the
 // Software is furnished to do so, subject to the following
 // conditions:
 // 
 // The above copyright notice and this permission notice shall be
 // included in all copies or substantial portions of the Software.
 // 
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.
 #include <nvcore/Ptr.h>
 #include <nvmath/Color.h>
 #include <nvimage/NormalMap.h>
 #include <nvimage/Filter.h>
 #include <nvimage/FloatImage.h>
 #include <nvimage/Image.h>
 using namespace nv;
 static float processPixel(const FloatImage * img, uint x, uint y)
 {
 	nvDebugCheck(img != NULL);
 	const uint w = img->width();
 	const uint h = img->height();
 	float d = img->pixel(x, y, 0);
 	float fx0 = (float) x / w;
 	float fy0 = (float) y / h;
 	float best_ratio = INF;
 	uint best_x = w;
 	uint best_y = h;
 	for (uint yy = 0; yy < h; yy++)
 	{
 		for (uint xx = 0; xx < w; xx++)
 		{
 			float ch = d - img->pixel(xx, yy, 0);
 			if (ch > 0)
 			{
 				float dx = float(xx - x);
 				float dy = float(yy - y);
 				float ratio = (dx * dx + dy * dy) / ch;
 				if (ratio < best_ratio)
 				{
 					best_x = xx;
 					best_y = yy;
 				}
 			}
 		}
 	}
 	if (best_x != w)
 	{
 		nvDebugCheck(best_y !=h);
 		float dx = float(best_x - x) / w;
 		float dy = float(best_y - y) / h;
 		float cw = sqrtf(dx*dx + dy*dy);
 		float ch = d - img->pixel(xx, yy, 0);
 		return min(1, sqrtf(cw / ch));
 	}
 	return 1;
 }
 // Create cone map using the given kernels.
 FloatImage * createConeMap(const Image * img, Vector4::Arg heightWeights)
 {
 	nvCheck(img != NULL);
 	const uint w = img->width();
 	const uint h = img->height();
 	AutoPtr<FloatImage> fimage(new FloatImage());
 	//fimage->allocate(2, w, h);
 	fimage->allocate(4, w, h);
 	// Compute height and store in red channel:
 	float * heightChannel = fimage->channel(0);
 	for(uint i = 0; i < w*h; i++)
 	{
 		Vector4 color = toVector4(img->pixel(i));
 		heightChannel[i] = dot(color, heightWeights);
 	}
 	// Compute cones:
 	for(uint y = 0; y < h; y++)
 	{
 		for(uint x = 0; x < w; x++)
 		{
 			processPixel(fimage, x, y);
 		}
 	}
 	return fimage.release();
 }
--- a/src/nvimage/ConeMap.h
+++ b/src/nvimage/ConeMap.h
@ -1,39 +0,0 @@
 // Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
 // 
 // Permission is hereby granted, free of charge, to any person
 // obtaining a copy of this software and associated documentation
 // files (the "Software"), to deal in the Software without
 // restriction, including without limitation the rights to use,
 // copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the
 // Software is furnished to do so, subject to the following
 // conditions:
 // 
 // The above copyright notice and this permission notice shall be
 // included in all copies or substantial portions of the Software.
 // 
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
 // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
 // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
 // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
 // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.
 #ifndef NV_IMAGE_CONEMAP_H
 #define NV_IMAGE_CONEMAP_H
 #include <nvmath/Vector.h>
 #include <nvimage/nvimage.h>
 namespace nv
 {
 	class Image;
 	class FloatImage;
 	FloatImage * createConeMap(const Image * img, Vector4::Arg heightWeights);
 } // nv namespace
 #endif // NV_IMAGE_CONEMAP_H
--- a/src/nvimage/HoleFilling.cpp
+++ b/src/nvimage/HoleFilling.cpp
@ -1,849 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #include <nvimage/HoleFilling.h>
 #include <nvimage/FloatImage.h>
 #include <nvmath/nvmath.h>
 #include <nvcore/Containers.h>
 #include <nvcore/Ptr.h>
 using namespace nv;
 float BitMap::sampleLinearClamp(float x, float y) const
 {
 	const float fracX = x - floor(x);
 	const float fracY = y - floor(y);
 	const uint ix0 = ::clamp(uint(floor(x)), 0U, m_width-1);
 	const uint iy0 = ::clamp(uint(floor(y)), 0U, m_height-1);
 	const uint ix1 = ::clamp(uint(floor(x))+1, 0U, m_width-1);
 	const uint iy1 = ::clamp(uint(floor(y))+1, 0U, m_height-1);
 	float f1 = bitAt(ix0, iy0);
 	float f2 = bitAt(ix1, iy0);
 	float f3 = bitAt(ix0, iy1);
 	float f4 = bitAt(ix1, iy1);
 	float i1 = lerp(f1, f2, fracX);
 	float i2 = lerp(f3, f4, fracX);
 	return lerp(i1, i2, fracY);
 }
 // This is a variation of Sapiro's inpainting method.
 void nv::fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap)
 {
 	nvCheck(img != NULL);
 	nvCheck(bmap != NULL);
 	const int w = img->width();
 	const int h = img->height();
 	const int count = img->componentNum();
 	nvCheck(bmap->width() == uint(w));
 	nvCheck(bmap->height() == uint(h));
 	AutoPtr<BitMap> newbmap(new BitMap(w, h));
 	newbmap->clearAll();
 	for(int p = 0; p < passCount; p++)
 	{
 		for(int c = 0; c < count; c++)
 		{
 			float * channel = img->channel(c);
 			for(int y = 0; y < h; y++) {
 				for(int x = 0; x < w; x++) {
 					if (bmap->bitAt(x, y)) {
 						// Not a hole.
 						newbmap->setBitAt(x, y);
 						continue;
 					}
 					const bool west = bmap->bitAt(img->indexClamp(x-1, y));
 					const bool east = bmap->bitAt(img->indexClamp(x+1, y));
 					const bool north = bmap->bitAt(img->indexClamp(x, y-1));
 					const bool south = bmap->bitAt(img->indexClamp(x, y+1));
 					const bool northwest = bmap->bitAt(img->indexClamp(x-1, y-1));
 					const bool northeast = bmap->bitAt(img->indexClamp(x+1, y-1));
 					const bool southwest = bmap->bitAt(img->indexClamp(x-1, y+1));
 					const bool southeast = bmap->bitAt(img->indexClamp(x+1, y+1));
 					int num = west + east + north + south + northwest + northeast + southwest + southeast;
 					if (num != 0) {
 						float average = 0.0f;
 						if (num == 3 && west && northwest && southwest) {
 							average = channel[img->indexClamp(x-1, y)];
 						}
 						else if (num == 3 && east && northeast && southeast) {
 							average = channel[img->indexClamp(x+1, y)];
 						}
 						else if (num == 3 && north && northwest && northeast) {
 							average = channel[img->indexClamp(x, y-1)];
 						}
 						else if (num == 3 && south && southwest && southeast) {
 							average = channel[img->indexClamp(x, y+1)];
 						}
 						else {
 							float total = 0.0f;
 							if (west) { average += 1 * channel[img->indexClamp(x-1, y)]; total += 1; }
 							if (east) { average += 1 * channel[img->indexClamp(x+1, y)]; total += 1; }
 							if (north) { average += 1 * channel[img->indexClamp(x, y-1)]; total += 1; }
 							if (south) { average += 1 * channel[img->indexClamp(x, y+1)]; total += 1; }
 							if (northwest) { average += channel[img->indexClamp(x-1, y-1)]; ++total; }
 							if (northeast) { average += channel[img->indexClamp(x+1, y-1)]; ++total; }
 							if (southwest) { average += channel[img->indexClamp(x-1, y+1)]; ++total; }
 							if (southeast) { average += channel[img->indexClamp(x+1, y+1)]; ++total; }
 							average /= total;
 						}
 						channel[img->indexClamp(x, y)] = average;
 						newbmap->setBitAt(x, y);
 					}
 				}
 			}
 		}
 		// Update the bit mask.
 		swap(*newbmap, *bmap);
 	}
 }
 namespace {
 	struct Neighbor {
 		uint16 x;
 		uint16 y;
 		uint32 d;
 	};
 	// Compute euclidean squared distance.
 	static uint dist( uint16 ax, uint16 ay, uint16 bx, uint16 by ) {
 		int dx = bx - ax;
 		int dy = by - ay;
 		return uint(dx*dx + dy*dy);
 	}
 	// Check neighbour, this is the core of the EDT algorithm.
 	static void checkNeighbour( int x, int y, Neighbor * e, const Neighbor & n ) {
 		nvDebugCheck(e != NULL);
 		uint d = dist( x, y, n.x, n.y );
 		if( d < e->d ) {
 			e->x = n.x;
 			e->y = n.y;
 			e->d = d;
 		}
 	}
 } // namespace
 // Voronoi filling using EDT-4
 // This implementation is based on Danielsson's algorithm published in:
 // "Euclidean Distance Mapping" Per-Erik Danielsson, Computer Graphics and Image Processing, 14, 1980
 void nv::fillVoronoi(FloatImage * img, const BitMap * bmap)
 {
 	nvCheck(img != NULL);
 	const int w = img->width();
 	const int h = img->height();
 	const int count = img->componentNum();
 	nvCheck(bmap->width() == uint(w));
 	nvCheck(bmap->height() == uint(h));
 	Array<Neighbor> edm;
 	edm.resize(w * h);
 	int x, y;
 	int x0, x1, y0, y1;
 	// Init edm.
 	for( y = 0; y < h; y++ ) {
 		for( x = 0; x < w; x++ ) {
 			if( bmap->bitAt(x, y) ) {
 				edm[y * w + x].x = x;
 				edm[y * w + x].y = y;
 				edm[y * w + x].d = 0;
 			}
 			else {
 				edm[y * w + x].x = w;
 				edm[y * w + x].y = h;
 				edm[y * w + x].d = w*w + h*h;
 			}
 		}
 	}
 	// First pass.
 	for( y = 0; y < h; y++ ) {
 		for( x = 0; x < w; x++ ) {
 			x0 = clamp(x-1, 0, w-1);	// @@ Wrap?
 			x1 = clamp(x+1, 0, w-1);
 			y0 = clamp(y-1, 0, h-1);
 			Neighbor & e = edm[y * w + x];
 			checkNeighbour(x, y, &e, edm[y0 * w + x0]);
 			checkNeighbour(x, y, &e, edm[y0 * w + x]);
 			checkNeighbour(x, y, &e, edm[y0 * w + x1]);
 			checkNeighbour(x, y, &e, edm[y * w + x0]);
 		}
 		for( x = w-1; x >= 0; x-- ) {
 			x1 = clamp(x+1, 0, w-1);
 			Neighbor & e = edm[y * w + x];
 			checkNeighbour(x, y, &e, edm[y * w + x1]);
 		}
 	}
 	// Third pass.
 	for( y = h-1; y >= 0; y-- ) {
 		for( x = w-1; x >= 0; x-- ) {
 			x0 = clamp(x-1, 0, w-1);
 			x1 = clamp(x+1, 0, w-1);
 			y1 = clamp(y+1, 0, h-1);
 			Neighbor & e = edm[y * w + x];
 			checkNeighbour(x, y, &e, edm[y * w + x1]);
 			checkNeighbour(x, y, &e, edm[y1 * w + x0]);
 			checkNeighbour(x, y, &e, edm[y1 * w + x]);
 			checkNeighbour(x, y, &e, edm[y1 * w + x1]);
 		}
 		for( x = 0; x < w; x++ ) {
 			x0 = clamp(x-1, 0, w-1);
 			Neighbor & e = edm[y * w + x];
 			checkNeighbour(x, y, &e, edm[y * w + x0]);
 		}
 	}
 	// Fill empty holes.
 	for( y = 0; y < h; y++ ) {
 		for( x = 0; x < w; x++ ) {
 			const int sx = edm[y * w + x].x;
 			const int sy = edm[y * w + x].y;
 			if (sx < w && sy < h)
 			{
 				if (sx != x || sy != y)	{
 					for(int c = 0; c < count; c++ ) {
 						img->setPixel(img->pixel(sx, sy, c), x, y, c);
 					}
 				}
 			}
 		}
 	}
 }
 static bool downsample(const FloatImage * src, const BitMap * srcMask, const FloatImage ** _dst, const BitMap ** _dstMask)
 {
 	const uint w = src->width();
 	const uint h = src->height();
 	const uint count = src->componentNum();
 	// count holes in srcMask, return false if fully filled.
 	uint holes = 0;
 	for(uint y = 0; y < h; y++) {
 		for(uint x = 0; x < w; x++) {
 			holes += srcMask->bitAt(x, y) == 0;
 		}
 	}
 	if (holes == 0 || (w == 2 || h == 2)) {
 		// Stop when no holes or when the texture is very small.
 		return false;
 	}
 	// Apply box filter to image and mask and return true.
 	const uint nw = w / 2;
 	const uint nh = h / 2;
 	FloatImage * dst = new FloatImage();
 	dst->allocate(count, nw, nh);
 	BitMap * dstMask = new BitMap(nw, nh);
 	dstMask->clearAll();
 	for(uint c = 0; c < count; c++) {
 		for(uint y = 0; y < nh; y++) {
 			for(uint x = 0; x < nw; x++) {
 				const uint x0 = 2 * x + 0;
 				const uint x1 = 2 * x + 1;
 				const uint y0 = 2 * y + 0;
 				const uint y1 = 2 * y + 1;
 				const float f0 = src->pixel(x0, y0, c);
 				const float f1 = src->pixel(x1, y0, c);
 				const float f2 = src->pixel(x0, y1, c);
 				const float f3 = src->pixel(x1, y1, c);
 				const bool b0 = srcMask->bitAt(x0, y0);
 				const bool b1 = srcMask->bitAt(x1, y0);
 				const bool b2 = srcMask->bitAt(x0, y1);
 				const bool b3 = srcMask->bitAt(x1, y1);
 				if (b0 || b1 || b2 || b3) {
 					// Set bit mask.
 					dstMask->setBitAt(x, y);
 					// Set pixel.
 					float value = 0.0f;
 					int total = 0;
 					if (b0) { value += f0; total++; }
 					if (b1) { value += f1; total++; }
 					if (b2) { value += f2; total++; }
 					if (b3) { value += f3; total++; }
 					dst->setPixel(value / total, x, y, c);
 				}
 			}
 		}
 	}
 	*_dst = dst;
 	*_dstMask = dstMask;
 	return true;
 }
 // This is the filter used in the Lumigraph paper.
 void nv::fillPullPush(FloatImage * img, const BitMap * bmap)
 {
 	nvCheck(img != NULL);
 	const uint count = img->componentNum();
 	const uint w = img->width();
 	const uint h = img->height();
 	const uint num = log2(max(w,h));
 	// Build mipmap chain.
 	Array<const FloatImage *> mipmaps(num);
 	Array<const BitMap *> mipmapMasks(num);
 	mipmaps.append(img);
 	mipmapMasks.append(bmap);
 	const FloatImage * current;
 	const BitMap * currentMask;
 	// Compute mipmap chain.
 	while(downsample(mipmaps.back(), mipmapMasks.back(), &current, &currentMask))
 	{
 		mipmaps.append(current);
 		mipmapMasks.append(currentMask);
 	}
 	// Sample mipmaps until non-hole is found.
 	for(uint y = 0; y < h; y++) {
 		for(uint x = 0; x < w; x++) {
 			int sx = x;
 			int sy = y;
 			//float sx = x;
 			//float sy = y;
 			const uint levelCount = mipmaps.count();
 			for (uint l = 0; l < levelCount; l++)
 			{
 				//const float fx = sx / mipmaps[l]->width();
 				//const float fy = sy / mipmaps[l]->height();
 				if (mipmapMasks[l]->bitAt(sx, sy))
 				{
 					// Sample mipmaps[l](sx, sy) and copy to img(x, y)
 					for(uint c = 0; c < count; c++) {
 						//img->setPixel(mipmaps[l]->linear_clamp(fx, fy, c), x, y, c);
 						img->setPixel(mipmaps[l]->pixel(sx, sy, c), x, y, c);
 					}
 					break;
 				}
 				sx /= 2;
 				sy /= 2;
 			}
 		}
 	}
 	// Don't delete the original image and mask.
 	mipmaps[0] = NULL;
 	mipmapMasks[0] = NULL;
 	// Delete the mipmaps.
 	deleteAll(mipmaps);
 	deleteAll(mipmapMasks);
 }
 // It looks much cooler with trilinear filtering
 void nv::fillPullPushLinear(FloatImage * img, const BitMap * bmap)
 {
 	nvCheck(img != NULL);
 	const uint count = img->componentNum();
 	const uint w = img->width();
 	const uint h = img->height();
 	const uint num = log2(max(w,h));
 	// Build mipmap chain.
 	Array<const FloatImage *> mipmaps(num);
 	Array<const BitMap *> mipmapMasks(num);
 	mipmaps.append(img);
 	mipmapMasks.append(bmap);
 	const FloatImage * current;
 	const BitMap * currentMask;
 	// Compute mipmap chain.
 	while(downsample(mipmaps.back(), mipmapMasks.back(), &current, &currentMask))
 	{
 		mipmaps.append(current);
 		mipmapMasks.append(currentMask);
 	}
 	// Sample mipmaps until non-hole is found.
 	for(uint y = 0; y < h; y++) {
 		for(uint x = 0; x < w; x++) {
 			float sx = x;
 			float sy = y;
 			float coverageSum = 0.0f;
 			const uint levelCount = mipmaps.count();
 			for (uint l = 0; l < levelCount; l++)
 			{
 				const float fx = sx / mipmaps[l]->width();
 				const float fy = sy / mipmaps[l]->height();
 				float coverage = mipmapMasks[l]->sampleLinearClamp(sx, sy);
 				if (coverage > 0.0f)
 				{
 					// Sample mipmaps[l](sx, sy) and copy to img(x, y)
 					for(uint c = 0; c < count; c++) {
 						img->addPixel((1 - coverageSum) * mipmaps[l]->sampleLinearClamp(fx, fy, c), x, y, c);
 					}
 					coverageSum += coverage;
 					if (coverageSum >= 1.0f)
 					{
 						break;
 					}
 				}
 				sx /= 2;
 				sy /= 2;
 			}
 		}
 	}
 	// Don't delete the original image and mask.
 	mipmaps[0] = NULL;
 	mipmapMasks[0] = NULL;
 	// Delete the mipmaps.
 	deleteAll(mipmaps);
 	deleteAll(mipmapMasks);
 }
 /*
 This Code is from Charles Bloom:
 DoPixelSeamFix
 10-20-02
 Looks in the 5x5 local neighborhood (LocalPixels) of the desired pixel to fill.
 It tries to build a quadratic model of the neighborhood surface to use in
 extrapolating.  You need 5 pixels to establish a 2d quadratic curve.
 This is really just a nice generic way to extrapolate pixels.  It also happens
 to work great for seam-fixing.
 Note that I'm working on normals, but I treat them just as 3 scalars and normalize
 at the end.  To be more correct, I would work on the surface of a sphere, but that
 just seems like way too much work.
 */
 struct LocalPixels
 {
 	// 5x5 neighborhood
 	// the center is at result
 	// index [y][x]
 	bool fill[5][5];
 	float data[5][5];
 	mutable float result;
 	mutable float weight;
 	bool Quad3SubH(float * pQ, int row) const
 	{
 		const bool * pFill = fill[row];
 		const float * pDat = data[row];
 		if ( pFill[1] && pFill[2] && pFill[3] )
 		{
 			// good row
 			*pQ = pDat[1] - 2.f * pDat[2] + pDat[3];
 			return true;
 		}
 		else if ( pFill[0] && pFill[1] && pFill[2] )
 		{
 			// good row
 			*pQ = pDat[0] - 2.f * pDat[1] + pDat[2];
 			return true;
 		}
 		else if ( pFill[2] && pFill[3] && pFill[4] )
 		{
 			// good row
 			*pQ = pDat[2] - 2.f * pDat[3] + pDat[4];
 			return true;
 		}
 		return false;
 	}
 	// improve result with a horizontal quad in row 1 and/or 
 	bool Quad3SubV(float * pQ, int col) const
 	{
 		if ( fill[1][col] && fill[2][col] && fill[3][col] )
 		{
 			// good row
 			*pQ = data[1][col] - 2.f * data[2][col] + data[3][col];
 			return true;
 		}
 		else if ( fill[0][col] && fill[1][col] && fill[2][col] )
 		{
 			// good row
 			*pQ = data[0][col] - 2.f * data[1][col] + data[2][col];
 			return true;
 		}
 		else if ( fill[2][col] && fill[3][col] && fill[4][col] )
 		{
 			// good row
 			*pQ = data[2][col] - 2.f * data[3][col] + data[4][col];
 			return true;
 		}
 		return false;
 	}
 	bool Quad3H(float * pQ) const
 	{
 		if (!Quad3SubH(pQ,1))
 		{
 			return Quad3SubH(pQ,3);	
 		}
 		float q = 0.0f; // initializer not needed, just make it shut up
 		if (Quad3SubH(&q, 3))
 		{
 			// got q and pQ
 			*pQ = (*pQ+q)*0.5f;
 		}
 		return true;
 	}
 	bool Quad3V(float * pQ) const
 	{
 		if (!Quad3SubV(pQ, 1))
 		{
 			return Quad3SubV(pQ, 3);	
 		}
 		float q = 0.0f; // initializer not needed, just make it shut up
 		if (Quad3SubV(&q, 3))
 		{
 			// got q and pQ
 			*pQ = (*pQ + q) * 0.5f;
 		}
 		return true;
 	}
 	// Quad returns ([0]+[2] - 2.f*[1])
 	//	a common want is [1] - ([0]+[2])*0.5f ;
 	// so use -0.5f*Quad
 	bool tryQuads() const
 	{
 		bool res = false;
 		// look for a pair that straddles the middle:
 		if ( fill[2][1] && fill[2][3] )
 		{
 			// got horizontal straddle
 			float q;
 			if ( Quad3H(&q) )
 			{
 				result += (data[2][1] + data[2][3] - q) * 0.5f;
 				weight += 1.f;
 				res = true;
 			}
 		}
 		if ( fill[1][2] && fill[3][2] )
 		{
 			// got vertical straddle
 			float q;
 			if ( Quad3V(&q) )
 			{
 				result += (data[1][2] + data[3][2] - q) * 0.5f;
 				weight += 1.f;
 				res = true;
 			}
 		}
 		// look for pairs that lead into the middle :
 		if ( fill[2][0] && fill[2][1] )
 		{
 			// got left-side pair
 			float q;
 			if ( Quad3H(&q) )
 			{
 				result += data[2][1]*2.f - data[2][0] + q;
 				weight += 1.f;
 				res = true;
 			}
 		}
 		if ( fill[2][3] && fill[2][4] )
 		{
 			// got right-side pair
 			float q;
 			if ( Quad3H(&q) )
 			{
 				result += data[2][3]*2.f - data[2][4] + q;
 				weight += 1.f;
 				res = true;
 			}
 		}
 		if ( fill[0][2] && fill[1][2] )
 		{
 			// got left-side pair
 			float q;
 			if ( Quad3V(&q) )
 			{
 				result += data[1][2]*2.f - data[0][2] + q;
 				weight += 1.f;
 				res = true;
 			}
 		}
 		if ( fill[3][2] && fill[4][2] )
 		{
 			// got right-side pair
 			float q;
 			if ( Quad3V(&q) )
 			{
 				result += data[3][2]*2.f - data[4][2] + q;
 				weight += 1.f;
 				res = true;
 			}
 		}
 		return res;
 	}
 	bool tryPlanar() const
 	{
 		// four cases :
 		const int indices[] =
 		{
 			2,1, 1,2, 1,1,
 			2,1, 3,2, 3,1,
 			2,3, 1,2, 1,3,
 			2,3, 3,2, 3,3
 		};
 		bool res = false;
 		for (int i = 0; i < 4; i++)
 		{
 			const int * I = indices + i*6;
 			if (!fill[ I[0] ][ I[1] ])
 				continue;
 			if (!fill[ I[2] ][ I[3] ])
 				continue;
 			if (!fill[ I[4] ][ I[5] ])
 				continue;
 			result += data[ I[0] ][ I[1] ] + data[ I[2] ][ I[3] ] - data[ I[4] ][ I[5] ];
 			weight += 1.0f;
 			res = true;
 		}
 		return res;
 	}
 	bool tryTwos() const
 	{
 		bool res = false;
 		if (fill[2][1] && fill[2][3])
 		{
 			result += (data[2][1] + data[2][3]) * 0.5f;
 			weight += 1.0f;
 			res = true;
 		}
 		if (fill[1][2] && fill[3][2])
 		{
 			result += (data[1][2] + data[3][2]) * 0.5f;
 			weight += 1.0f;
 			res = true;
 		}
 		// four side-rotates :
 		const int indices[] =
 		{
 			2,1, 2,0,
 			2,3, 2,4,
 			1,2, 0,2,
 			3,2, 4,2,
 		};
 		for (int i = 0; i < 4; i++)
 		{
 			const int * I = indices + i*4;
 			if (!fill[ I[0] ][ I[1] ])
 				continue;
 			if (!fill[ I[2] ][ I[3] ])
 				continue;
 			result += data[ I[0] ][ I[1] ]*2.0f - data[ I[2] ][ I[3] ];
 			weight += 1.0f;
 			res = true;
 		}
 		return res;
 	}
 	bool doLocalPixelFill() const
 	{
 		result = 0.0f;
 		weight = 0.0f;
 		if (tryQuads()) {
 			return true;
 		}
 		if (tryPlanar()) {
 			return true;
 		}
 		return tryTwos();
 	}
 }; // struct LocalPixels
 // This is a quadratic extrapolation filter from Charles Bloom (DoPixelSeamFix). Used with his permission.
 void nv::fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/)
 {
 	nvCheck(passCount > 0);
 	nvCheck(img != NULL);
 	nvCheck(bmap != NULL);
 	const int w = img->width();
 	const int h = img->height();
 	const int count = img->componentNum();
 	nvCheck(bmap->width() == uint(w));
 	nvCheck(bmap->height() == uint(h));
 	AutoPtr<BitMap> newbmap( new BitMap(w, h) );
 	newbmap->clearAll();
 	float * coverageChannel = NULL;
 	if (coverageIndex != -1)
 	{
 		coverageChannel = img->channel(coverageIndex);
 	}
 	int firstChannel = -1;
 	for (int p = 0; p < passCount; p++)
 	{
 		for (int c = 0; c < count; c++)
 		{
 			if (c == coverageIndex) continue;
 			if (firstChannel == -1) firstChannel = c;
 			float * channel = img->channel(c);
 			for (int yb = 0; yb < h; yb++) {
 				for (int xb = 0; xb < w; xb++) {
 					if (bmap->bitAt(xb, yb)) {
 						// Not a hole.
 						newbmap->setBitAt(xb, yb);
 						continue;
 					}
 					int numFill = 0;
 					LocalPixels lp;
 					for (int ny = 0; ny < 5; ny++)
 					{
 						int y = (yb + ny - 2);
 						if ( y < 0 || y >= h )
 						{
 							// out of range
 							for(int i = 0; i < 5; i++) 
 							{
 								lp.fill[ny][i] = false;
 							}
 							continue;
 						}
 						for (int nx = 0; nx < 5; nx++)
 						{
 							int x = (xb + nx - 2);
 							if (x < 0 || x >= w)
 							{
 								lp.fill[ny][nx] = false;
 							}
 							else
 							{
 								int idx = img->index(x, y);
 								if (!bmap->bitAt(idx))
 								{
 									lp.fill[ny][nx] = false;
 								}
 								else
 								{
 									lp.fill[ny][nx] = true;
 									lp.data[ny][nx] = channel[idx];
 									numFill++;
 								}
 							}
 						}
 					}
 					// need at least 3 to do anything decent
 					if (numFill < 2)
 						continue;
 					nvDebugCheck(lp.fill[2][2] == false);
 					if (lp.doLocalPixelFill())
 					{
 						const int idx = img->index(xb, yb);
 						channel[idx] = lp.result / lp.weight;
 						if (c == firstChannel)
 						{
 							//coverageChannel[idx] /= lp.weight;	// @@ Not sure what this was for, coverageChannel[idx] is always zero.
 							newbmap->setBitAt(xb, yb);
 						}
 					}
 				}
 			}
 		}
 		// Update the bit mask.
 		swap(*newbmap, *bmap);
 	}
 }
--- a/src/nvimage/HoleFilling.h
+++ b/src/nvimage/HoleFilling.h
@ -1,98 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_IMAGE_HOLEFILLING_H
 #define NV_IMAGE_HOLEFILLING_H
 #include <nvcore/BitArray.h>
 #include <nvimage/nvimage.h>
 namespace nv 
 {
 	class FloatImage;
 	/// Bit mask.
 	class BitMap
 	{
 	public:
 		BitMap(uint w, uint h) : 
 			m_width(w), m_height(h), m_bitArray(w*h) 
 		{
 		}
 		uint width() const { return m_width; }
 		uint height() const { return m_height; }
 		float sampleLinearClamp(float x, float y) const;
 		bool bitAt(uint x, uint y) const
 		{
 			nvDebugCheck(x < m_width && y < m_height);
 			return m_bitArray.bitAt(y * m_width + x);
 		}
 		bool bitAt(uint idx) const
 		{
 			return m_bitArray.bitAt(idx);
 		}
 		void setBitAt(uint x, uint y)
 		{
 			nvDebugCheck(x < m_width && y < m_height);
 			m_bitArray.setBitAt(y * m_width + x);
 		}
 		void setBitAt(uint idx)
 		{
 			m_bitArray.setBitAt(idx);
 		}
 		void clearBitAt(uint x, uint y)
 		{
 			nvDebugCheck(x < m_width && y < m_height);
 			m_bitArray.clearBitAt(y * m_width + x);
 		}
 		void clearBitAt(uint idx)
 		{
 			m_bitArray.clearBitAt(idx);
 		}
 		void clearAll()
 		{
 			m_bitArray.clearAll();
 		}
 		void setAll()
 		{
 			m_bitArray.setAll();
 		}
 		void toggleAll()
 		{
 			m_bitArray.toggleAll();
 		}
 		friend void swap(BitMap & a, BitMap & b)
 		{
 			nvCheck(a.m_width == b.m_width);
 			nvCheck(a.m_height == b.m_height);
 			//swap(const_cast<uint &>(a.m_width), const_cast<uint &>(b.m_width));
 			//swap(const_cast<uint &>(a.m_height), const_cast<uint &>(b.m_height));
 			swap(a.m_bitArray, b.m_bitArray);
 		}
 	private:
 		const uint m_width;
 		const uint m_height;
 		BitArray m_bitArray;
 	};
 	NVIMAGE_API void fillVoronoi(FloatImage * img, const BitMap * bmap);
 	NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap);
 	NVIMAGE_API void fillPullPushLinear(FloatImage * img, const BitMap * bmap);
 	NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap);
 	NVIMAGE_API void fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1);
 } // nv namespace
 #endif // NV_IMAGE_HOLEFILLING_H
--- a/src/nvimage/ImageIO.cpp
+++ b/src/nvimage/ImageIO.cpp
@ -12,8 +12,6 @@
 #include <nvcore/Containers.h>
 #include <nvcore/StrLib.h>
 #include <nvcore/StdStream.h>
 #include <nvcore/Tokenizer.h>
 #include <nvcore/TextWriter.h>
 // Extern
 #if defined(HAVE_JPEG)
@ -186,13 +184,6 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
 	}
 #endif
 	if (strCaseCmp(extension, ".pfm") == 0) {
 		return loadFloatPFM(fileName, s);
 	}
 	if (strCaseCmp(extension, ".hdr") == 0) {
 		return loadGridFloat(fileName, s);
 	}
 	return NULL;
 }
@ -215,12 +206,6 @@ bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, ui
 	}
 #endif
 	// @@ Disable Temporarily
 	if (strCaseCmp(extension, ".pfm") == 0)
 	{
 //		return ImageIO::saveFloatPFM(fileName, fimage, base_component, num_components);
 	}
 	//if (num_components == 3 || num_components == 4)
 	if (num_components <= 4)
 	{
@ -1532,399 +1517,3 @@ bool nv::ImageIO::saveFloatEXR(const char * fileName, const FloatImage * fimage,
 #endif // defined(HAVE_OPENEXR)
 FloatImage * nv::ImageIO::loadFloatPFM(const char * fileName, Stream & s)
 {
 	nvCheck(s.isLoading());
 	nvCheck(!s.isError());
 	Tokenizer parser(&s);
 	parser.nextToken();
 	bool grayscale;
 	if (parser.token() == "PF")
 	{
 		grayscale = false;
 	}
 	else if (parser.token() == "Pf")
 	{
 		grayscale = true;
 	}
 	else
 	{
 		// Invalid file.
 		return NULL;
 	}
 	parser.nextLine();
 	int width = parser.token().toInt(); parser.nextToken();
 	int height = parser.token().toInt();
 	parser.nextLine();
 	float scaleFactor = parser.token().toFloat();
 	if (scaleFactor >= 0)
 	{
 		s.setByteOrder(Stream::BigEndian);
 	}
 	else
 	{
 		s.setByteOrder(Stream::LittleEndian);
 	}
 	scaleFactor = fabsf(scaleFactor);
 	// Allocate image.
 	AutoPtr<FloatImage> fimage(new FloatImage());
 	if (grayscale)
 	{
 		fimage->allocate(1, width, height);
 		float * channel = fimage->channel(0);
 		for (int i = 0; i < width * height; i++)
 		{
 			s << channel[i];
 		}
 	}
 	else
 	{
 		fimage->allocate(3, width, height);
 		float * rchannel = fimage->channel(0);
 		float * gchannel = fimage->channel(1);
 		float * bchannel = fimage->channel(2);
 		for (int i = 0; i < width * height; i++)
 		{
 			s << rchannel[i] << gchannel[i] << bchannel[i];
 		}
 	}
 	return fimage.release();
 }
 bool nv::ImageIO::saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components)
 {
 	nvCheck(fileName != NULL);
 	nvCheck(fimage != NULL);
 	nvCheck(fimage->componentNum() <= base_component + num_components);
 	nvCheck(num_components == 1 || num_components == 3);
 	StdOutputStream stream(fileName);
 	TextWriter writer(&stream);
 	if (num_components == 1) writer.write("Pf\n");
 	else /*if (num_components == 3)*/ writer.write("PF\n");
 	int w = fimage->width();
 	int h = fimage->height();
 	writer.write("%d %d\n", w, h);
 	writer.write("%f\n", -1.0f);	// little endian with 1.0 scale.
 	if (num_components == 1)
 	{
 		float * channel = const_cast<float *>(fimage->channel(0));
 		for (int i = 0; i < w * h; i++)
 		{
 			stream << channel[i];
 		}
 	}
 	else
 	{
 		float * rchannel = const_cast<float *>(fimage->channel(0));
 		float * gchannel = const_cast<float *>(fimage->channel(1));
 		float * bchannel = const_cast<float *>(fimage->channel(2));
 		for (int i = 0; i < w * h; i++)
 		{
 			stream << rchannel[i] << gchannel[i] << bchannel[i];
 		}
 	}
 	return true;
 }
 //#pragma warning(disable : 4996)
 NVIMAGE_API FloatImage * nv::ImageIO::loadGridFloat(const char * fileName, Stream & s)
 {
 	nvCheck(s.isLoading());
 	nvCheck(!s.isError());
 	Tokenizer parser(&s);
 	parser.nextLine();
 	if (parser.token() != "ncols")
 	{
 		nvDebug("Failed to find 'ncols' token in file '%s'.\n", fileName);
 		return NULL;
 	}
 	parser.nextToken(true);
 	const int nCols = parser.token().toInt(); 
 	parser.nextToken(true);
 	if (parser.token() != "nrows")
 	{
 		nvDebug("Failed to find 'nrows' token in file '%s'.\n", fileName);
 		return NULL;
 	}
 	parser.nextToken(true);
 	const int nRows = parser.token().toInt();
 	/* There's a byte order defined in the header.  We could read it.  However, here we 
 	   just assume that it matches the platform's byte order.
 	// There is then a bunch of data that we don't care about (lat, long definitions, etc).
 	for (int i=0; i!=9; ++i)
 		parser.nextToken(true);
 	if (parser.token() != "byteorder")
 		return NULL;
 	parser.nextToken(true);
 	const Stream::ByteOrder byteOrder = (parser.token() == "LSBFIRST")? Stream::LittleEndian: Stream::BigEndian;
 	*/
 	// GridFloat comes in two files: an ASCII header which was parsed above (.hdr) and a big blob
 	// of binary data in a .flt file.
 	Path dataPath(fileName);
 	dataPath.stripExtension();
 	dataPath.append(".flt");
 	// Open the binary data.
 	FILE* file = fopen(dataPath.fileName(), "rb");
 	if (!file)
 	{
 		nvDebug("Failed to find GridFloat blob file '%s' corresponding to '%s'.\n", dataPath.fileName(), fileName);
 		return NULL;
 	}
 	// Allocate image.
 	AutoPtr<FloatImage> fimage(new FloatImage());
 	fimage->allocate(1, nCols, nRows);
 	float * channel = fimage->channel(0);
 	// The binary blob is defined to be in row-major order, containing IEEE floats.
 	// So we can just slurp it in.  Theoretically, we ought to use the byte order.
 	const size_t nRead = fread((void*) channel, sizeof(float), nRows * nCols, file);
 	fclose(file);
 	return fimage.release();
 }
 #if 0
 /** Save PNG*/
 static bool SavePNG(const PiImage * img, const char * name) {
 	nvCheck( img != NULL );
 	nvCheck( img->mem != NULL );
 	if( piStrCmp(piExtension(name), ".png" ) != 0 ) {
 		return false;
 	}
 	if( img->flags & PI_IT_CUBEMAP ) {
 		nvDebug("*** Cannot save cubemaps as PNG.");
 		return false;
 	}
 	if( img->flags & PI_IT_DDS ) {
 		nvDebug("*** Cannot save DDS surface as PNG.");
 		return false;
 	}
 	nvDebug( "--- Saving '%s'.\n", name );
 	PiAutoPtr<PiStream> ar( PiFileSystem::CreateFileWriter( name ) );
 	if( ar == NULL ) {
 		nvDebug( "*** SavePNG: Error, cannot save file '%s'.\n", name );
 		return false;
 	}
 /*
 public class PNGEnc {
    public static function encode(img:BitmapData):ByteArray {
        // Create output byte array
        var png:ByteArray = new ByteArray();
        // Write PNG signature
        png.writeUnsignedInt(0x89504e47);
        png.writeUnsignedInt(0x0D0A1A0A);
        // Build IHDR chunk
        var IHDR:ByteArray = new ByteArray();
        IHDR.writeInt(img.width);
        IHDR.writeInt(img.height);
        IHDR.writeUnsignedInt(0x08060000); // 32bit RGBA
        IHDR.writeByte(0);
        writeChunk(png,0x49484452,IHDR);
        // Build IDAT chunk
        var IDAT:ByteArray= new ByteArray();
        for(var i:int=0;i < img.height;i++) {
            // no filter
            IDAT.writeByte(0);
            var p:uint;
            if ( !img.transparent ) {
                for(var j:int=0;j < img.width;j++) {
                    p = img.getPixel(j,i);
                    IDAT.writeUnsignedInt(
                        uint(((p&0xFFFFFF) << 8)|0xFF));
                }
            } else {
                for(var j:int=0;j < img.width;j++) {
                    p = img.getPixel32(j,i);
                    IDAT.writeUnsignedInt(
                        uint(((p&0xFFFFFF) << 8)|
                        (shr(p,24))));
                }
            }
        }
        IDAT.compress();
        writeChunk(png,0x49444154,IDAT);
        // Build IEND chunk
        writeChunk(png,0x49454E44,null);
        // return PNG
        return png;
    }
    private static var crcTable:Array;
    private static var crcTableComputed:Boolean = false;
    private static function writeChunk(png:ByteArray, 
            type:uint, data:ByteArray) {
        if (!crcTableComputed) {
            crcTableComputed = true;
            crcTable = [];
            for (var n:uint = 0; n < 256; n++) {
                var c:uint = n;
                for (var k:uint = 0; k < 8; k++) {
                    if (c & 1) {
                        c = uint(uint(0xedb88320) ^ 
                            uint(c >>> 1));
                    } else {
                        c = uint(c >>> 1);
                    }
                }
                crcTable[n] = c;
            }
        }
        var len:uint = 0;
        if (data != null) {
            len = data.length;
        }
        png.writeUnsignedInt(len);
        var p:uint = png.position;
        png.writeUnsignedInt(type);
        if ( data != null ) {
            png.writeBytes(data);
        }
        var e:uint = png.position;
        png.position = p;
        var c:uint = 0xffffffff;
        for (var i:int = 0; i < (e-p); i++) {
            c = uint(crcTable[
                (c ^ png.readUnsignedByte()) & 
                uint(0xff)] ^ uint(c >>> 8));
        }
        c = uint(c^uint(0xffffffff));
        png.position = e;
        png.writeUnsignedInt(c);
    }
 }
 */
 }
 #endif // 0
 #if 0
 namespace ImageIO {
 	/** Init ImageIO plugins. */
 	void InitPlugins() {
 	//	AddInputPlugin( "", LoadANY );
 		AddInputPlugin( "tga", LoadTGA );
 #if HAVE_PNG
 		AddInputPlugin( "png", LoadPNG );
 #endif
 #if HAVE_JPEG
 		AddInputPlugin( "jpg", LoadJPG );
 #endif
 		AddInputPlugin( "dds", LoadDDS );
 		AddOutputPlugin( "tga", SaveTGA );
 	}
 	/** Reset ImageIO plugins. */
 	void ResetPlugins() {
 		s_plugin_load_map.Clear();
 		s_plugin_save_map.Clear();
 	}
 	/** Add an input plugin. */
 	void AddInputPlugin( const char * ext, ImageInput_Plugin plugin ) {
 		s_plugin_load_map.Add(ext, plugin);
 	}
 	/** Add an output plugin. */
 	void AddOutputPlugin( const char * ext, ImageOutput_Plugin plugin ) {
 		s_plugin_save_map.Add(ext, plugin);
 	}
 	bool Load(PiImage * img, const char * name, PiStream & stream) {
 		// Get name extension.
 		const char * extension = piExtension(name);
 		// Skip the dot.
 		if( *extension == '.' ) {
 			extension++;
 		}
 		// Lookup plugin in the map.
 		ImageInput_Plugin plugin = NULL;
 		if( s_plugin_load_map.Get(extension, &plugin) ) {
 			return plugin(img, stream);
 		}
 		/*foreach(i, s_plugin_load_map) {
 			nvDebug("%s %s %d\n", s_plugin_load_map[i].key.GetStr(), extension, 0 == strcmp(extension, s_plugin_load_map[i].key));
 		}
 		nvDebug("No plugin found for '%s' %d.\n", extension, s_plugin_load_map.Size());*/
 		return false;
 	}
 	bool Save(const PiImage * img, const char * name, PiStream & stream) {
 		// Get name extension.
 		const char * extension = piExtension(name);
 		// Skip the dot.
 		if( *extension == '.' ) {
 			extension++;
 		}
 		// Lookup plugin in the map.
 		ImageOutput_Plugin plugin = NULL;
 		if( s_plugin_save_map.Get(extension, &plugin) ) {
 			return plugin(img, stream);
 		}
 		return false;
 	}
 } // ImageIO
 #endif // 0
--- a/src/nvimage/ImageIO.h
+++ b/src/nvimage/ImageIO.h
@ -53,14 +53,6 @@ namespace nv
 		NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
 #endif
 		NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
 		NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
 		// GridFloat is a simple, open format for terrain elevation data.  See http://ned.usgs.gov/Ned/faq.asp.
 		// Expects: 1) fileName will be an ".hdr" header file, 2) there will also exist a corresponding float data
 		// blob in a ".flt" file.  (This is what USGS gives you.)
 		NVIMAGE_API FloatImage * loadGridFloat(const char * fileName, Stream & s);
 	} // ImageIO namespace
 } // nv namespace
--- a/src/nvimage/NormalMipmap.h
+++ b/src/nvimage/NormalMipmap.h
@ -1,17 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_IMAGE_NORMALMIPMAP_H
 #define NV_IMAGE_NORMALMIPMAP_H
 #include <nvimage/nvimage.h>
 namespace nv
 {
 	class FloatImage;
 	FloatImage * createNormalMipmapMap(const FloatImage * img);
 } // nv namespace
 #endif // NV_IMAGE_NORMALMIPMAP_H
--- a/src/nvmath/CMakeLists.txt
+++ b/src/nvmath/CMakeLists.txt
@ -7,7 +7,6 @@ SET(MATH_SRCS
 	Plane.h Plane.cpp
 	Box.h
 	Color.h
 	Random.h Random.cpp
 	Half.h Half.cpp
 	Fitting.h Fitting.cpp)
--- a/src/nvmath/Montecarlo.cpp
+++ b/src/nvmath/Montecarlo.cpp
@ -1,135 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #include <nvmath/Montecarlo.h>
 using namespace nv;
 void SampleDistribution::redistribute(Method method/*=Method_NRook*/, Distribution dist/*=Distribution_Cosine*/)
 {
 	switch(method) 
 	{
 		case Method_Random:
 			redistributeRandom(dist);
 			break;
 		case Method_Stratified:
 			redistributeStratified(dist);
 			break;
 		case Method_NRook:
 			redistributeNRook(dist);
 			break;
 	};
 }
 void SampleDistribution::redistributeRandom(const Distribution dist)
 {
 	const uint sampleCount = m_sampleArray.count();
 	// This is the worst method possible!
 	for(uint i = 0; i < sampleCount; i++)
 	{
 		float x = m_rand.getFloat();
 		float y = m_rand.getFloat();
 		setSample(i, dist, x, y);
 	}
 }
 void SampleDistribution::redistributeStratified(const Distribution dist)
 {
 	const uint sampleCount = m_sampleArray.count();
 	const uint sqrtSampleCount = uint(sqrtf(float(sampleCount)));
 	nvDebugCheck(sqrtSampleCount*sqrtSampleCount == sampleCount);	// Must use exact powers!
 	// Create a uniform distribution of points on the hemisphere with low variance.
 	for(uint v = 0, i = 0; v < sqrtSampleCount; v++) {
 		for(uint u = 0; u < sqrtSampleCount; u++, i++) {
 			float x = (u + m_rand.getFloat()) / float(sqrtSampleCount);
 			float y = (v + m_rand.getFloat()) / float(sqrtSampleCount);
 			setSample(i, dist, x, y);
 		}
 	}
 }
 /** Multi-Stage N-rooks Sampling Method.
 * See: http://www.acm.org/jgt/papers/WangSung9/9
 */
 void SampleDistribution::multiStageNRooks(const int size, int* cells)
 {
 	if (size == 1) {
 		return;
 	}
 	int size1 = size >> 1;
 	int size2 = size >> 1;
 	if (size & 1) {
 		if (m_rand.getFloat() > 0.5) {
 			size1++;
 		}
 		else {
 			size2++;
 		}
 	}
 	int* upper_cells = new int[size1];
 	int* lower_cells = new int[size2];
 	int i, j;
 	for(i = 0, j = 0; i < size - 1; i += 2, j++) {
 		if (m_rand.get() & 1) {
 			upper_cells[j] = cells[i];
 			lower_cells[j] = cells[i + 1];
 		}
 		else {
 			upper_cells[j] = cells[i + 1];
 			lower_cells[j] = cells[i];
 		}
 	}
 	if (size1 != size2) {
 		if (size1 > size2) {
 			upper_cells[j] = cells[i];
 		}
 		else {
 			lower_cells[j] = cells[i];
 		}
 	}
 	multiStageNRooks(size1, upper_cells);
 	memcpy(cells, upper_cells, size1 * sizeof(int));
 	delete [] upper_cells;
 	multiStageNRooks(size2, lower_cells);
 	memcpy(cells + size1, lower_cells, size2 * sizeof(int));
 	delete [] lower_cells;
 }
 void SampleDistribution::redistributeNRook(const Distribution dist)
 {
 	const uint sampleCount = m_sampleArray.count();
 	// Generate nrook cells
 	int * cells = new int[sampleCount];
 	for(uint32 i = 0; i < sampleCount; i++)
 	{
 		cells[i] = i;
 	}
 	multiStageNRooks(sampleCount, cells);
 	for(uint i = 0; i < sampleCount; i++)
 	{
 		float x = (i + m_rand.getFloat()) / sampleCount;
 		float y = (cells[i] + m_rand.getFloat()) / sampleCount;
 		setSample(i, dist, x, y);
 	}
 	delete [] cells;
 }
--- a/src/nvmath/Montecarlo.h
+++ b/src/nvmath/Montecarlo.h
@ -1,103 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_MATH_MONTECARLO_H
 #define NV_MATH_MONTECARLO_H
 #include <nvmath/Vector.h>
 #include <nvmath/Random.h>
 namespace nv
 {
 /// A random sample distribution.
 class SampleDistribution
 {
 public:
 	// Sampling method.
 	enum Method {
 		Method_Random,
 		Method_Stratified,
 		Method_NRook
 	};
 	// Distribution functions.
 	enum Distribution {
 		Distribution_UniformSphere,
 		Distribution_UniformHemisphere,
 		Distribution_CosineHemisphere
 	};
 	/// Constructor.
 	SampleDistribution(uint num)
 	{
 		m_sampleArray.resize(num);
 	}
 	uint count() const { return m_sampleArray.count(); }
 	void redistribute(Method method=Method_NRook, Distribution dist=Distribution_CosineHemisphere);
 	/// Get parametric coordinates of the sample.
 	Vector2 sample(int i) const { return m_sampleArray[i].uv; }
 	/// Get sample direction.
 	Vector3 sampleDir(int i) const { return m_sampleArray[i].dir; }
 	/// Get number of samples.
 	uint sampleCount() const { return m_sampleArray.count(); }
 private:
 	void redistributeRandom(const Distribution dist);
 	void redistributeStratified(const Distribution dist);
 	void multiStageNRooks(const int size, int* cells);
 	void redistributeNRook(const Distribution dist);
 	/// A sample of the random distribution.
 	struct Sample
 	{
 		/// Set sample given the 3d coordinates.
 		void setDir(float x, float y, float z) {
 			dir.set(x, y, z);
 			uv.set(acosf(z), atan2f(y, x));
 		}
 		/// Set sample given the 2d parametric coordinates.
 		void setUV(float u, float v) {
 			uv.set(u, v);
 			dir.set(sinf(u) * cosf(v), sinf(u) * sinf(v), cosf(u));
 		}
 		Vector2 uv;
 		Vector3 dir;
 	};
 	inline void setSample(uint i, Distribution dist, float x, float y)
 	{
 		// Map uniform distribution in the square to desired domain.
 		if( dist == Distribution_UniformSphere ) {
 			m_sampleArray[i].setUV(acosf(1 - 2 * x), 2 * PI * y);
 		}
 		else if( dist == Distribution_UniformHemisphere ) {
 			m_sampleArray[i].setUV(acosf(x), 2 * PI * y);
 		}
 		else {
 			nvDebugCheck(dist == Distribution_CosineHemisphere);
 			m_sampleArray[i].setUV(acosf(sqrtf(x)), 2 * PI * y);
 		}
 	}
 	/// Random seed.
 	MTRand m_rand;
 	/// Samples.
 	Array<Sample> m_sampleArray;
 };
 } // nv namespace
 #endif // NV_MATH_MONTECARLO_H
--- a/src/nvmath/Random.cpp
+++ b/src/nvmath/Random.cpp
@ -1,54 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #include <nvmath/Random.h>
 #include <time.h>
 using namespace nv;
 // Statics
 const uint16 Rand48::a0 = 0xE66D; 
 const uint16 Rand48::a1 = 0xDEEC; 
 const uint16 Rand48::a2 = 0x0005;
 const uint16 Rand48::c0 = 0x000B;
 /// Get a random seed based on the current time.
 uint Rand::randomSeed()
 {
 	return (uint)time(NULL);
 }
 void MTRand::initialize( uint32 seed )
 {
 	// Initialize generator state with seed
 	// See Knuth TAOCP Vol 2, 3rd Ed, p.106 for multiplier.
 	// In previous versions, most significant bits (MSBs) of the seed affect
 	// only MSBs of the state array.  Modified 9 Jan 2002 by Makoto Matsumoto.
 	uint32 *s = state;
 	uint32 *r = state;
 	int i = 1;
 	*s++ = seed & 0xffffffffUL;
 	for( ; i < N; ++i )
 	{
 		*s++ = ( 1812433253UL * ( *r ^ (*r >> 30) ) + i ) & 0xffffffffUL;
 		r++;
 	}
 }
 void MTRand::reload()
 {
 	// Generate N new values in state
 	// Made clearer and faster by Matthew Bellew (matthew.bellew@home.com)
 	uint32 *p = state;
 	int i;
 	for( i = N - M; i--; ++p )
 		*p = twist( p[M], p[0], p[1] );
 	for( i = M; --i; ++p )
 		*p = twist( p[M-N], p[0], p[1] );
 	*p = twist( p[M-N], p[0], state[0] );
 	left = N, next = state;
 }
--- a/src/nvmath/Random.h
+++ b/src/nvmath/Random.h
@ -1,368 +0,0 @@
 // This code is in the public domain -- castanyo@yahoo.es
 #ifndef NV_MATH_RANDOM_H
 #define NV_MATH_RANDOM_H
 #include <nvcore/Containers.h> // nextPowerOfTwo
 #include <nvmath/nvmath.h>
 namespace nv
 {
 /// Interface of the random number generators.
 class Rand
 {
 public:
 	virtual ~Rand() {}
 	enum time_e { Time };
 	/// Provide a new seed.
 	virtual void seed( uint s ) { /* empty */ };
 	/// Get an integer random number.
 	virtual uint get() = 0;
 	/// Get a random number on [0, max] interval.
 	uint getRange( uint max )
 	{
 		uint n;
 	//	uint mask = Bitmask( max );
 	//	do { n = Get() & mask; } while( n > max );		
 		uint np2 = nextPowerOfTwo( max );
 		do { n = get() & (np2-1); } while( n > max );
 		return n;
 	}
 	/// Random number on [0.0, 1.0] interval.
 	float getFloat()
 	{
    	union
 		{
 			uint32 i;
 			float f;
 		} pun;
 		pun.i = 0x3f800000UL | (get() & 0x007fffffUL);
 		return pun.f - 1.0f;
 	}
 	/*
 	/// Random number on [0.0, 1.0] interval.
 	double getReal()
 	{
 		return double(get()) * (1.0/4294967295.0); // 2^32-1
 	}
 	/// Random number on [0.0, 1.0) interval.
 	double getRealExclusive()
 	{
 		return double(get()) * (1.0/4294967296.0); // 2^32
 	}
 	*/
 	/// Get the max value of the random number.
 	uint max() const { return 4294967295U; }
 	// Get a random seed.
 	static uint randomSeed();
 };
 /// Very simple random number generator with low storage requirements.
 class SimpleRand : public Rand
 {
 public:
 	/// Constructor that uses the current time as the seed.
 	SimpleRand( time_e )
 	{
 		seed(randomSeed());
 	}
 	/// Constructor that uses the given seed.
 	SimpleRand( uint s = 0 )
 	{
 		seed(s);
 	}
 	/// Set the given seed.
 	virtual void seed( uint s )
 	{
 		current = s;
 	}
 	/// Get a random number.
 	virtual uint get()
 	{
 		return current = current * 1103515245 + 12345;
 	}
 private:
 	uint current;
 };
 /// Mersenne twister random number generator.
 class MTRand : public Rand
 {
 public:
 	enum { N = 624 };       // length of state vector
 	enum { M = 397 };
 	/// Constructor that uses the current time as the seed.
 	MTRand( time_e )
 	{
 		seed(randomSeed());
 	}
 	/// Constructor that uses the given seed.
 	MTRand( uint s = 0 )
 	{
 		seed(s);
 	}
 	/// Constructor that uses the given seeds.
 	NVMATH_API MTRand( const uint * seed_array, uint length );
 	/// Provide a new seed.
 	virtual void seed( uint s )
 	{
 		initialize(s);
 		reload();
 	}	
 	/// Get a random number between 0 - 65536.
 	virtual uint get()
 	{
 		// Pull a 32-bit integer from the generator state
 		// Every other access function simply transforms the numbers extracted here
 		if( left == 0 ) { 
 			reload(); 
 		}
 		left--;
 		uint s1;
 		s1 = *next++;
 		s1 ^= (s1 >> 11);
 		s1 ^= (s1 <<  7) & 0x9d2c5680U;
 		s1 ^= (s1 << 15) & 0xefc60000U;
 		return ( s1 ^ (s1 >> 18) );		
 	};
 private:
 	NVMATH_API void initialize( uint32 seed );
 	NVMATH_API void reload();
 	uint hiBit( uint u ) const { return u & 0x80000000U; }
 	uint loBit( uint u ) const { return u & 0x00000001U; }
 	uint loBits( uint u ) const { return u & 0x7fffffffU; }
 	uint mixBits( uint u, uint v ) const { return hiBit(u) | loBits(v); }
 	uint twist( uint m, uint s0, uint s1 ) const { return m ^ (mixBits(s0,s1)>>1) ^ ((~loBit(s1)+1) & 0x9908b0dfU); }
 private:
 	uint state[N];	// internal state
 	uint * next;	// next value to get from state
 	int left;		// number of values left before reload needed		
 };
 /** George Marsaglia's random number generator. 
 * Code based on Thatcher Ulrich public domain source code:
 * http://cvs.sourceforge.net/viewcvs.py/tu-testbed/tu-testbed/base/tu_random.cpp?rev=1.7&view=auto
 *
 * PRNG code adapted from the complimentary-multiply-with-carry
 * code in the article: George Marsaglia, "Seeds for Random Number
 * Generators", Communications of the ACM, May 2003, Vol 46 No 5,
 * pp90-93.
 * 
 * The article says:
 * 
 * "Any one of the choices for seed table size and multiplier will
 * provide a RNG that has passed extensive tests of randomness,
 * particularly those in [3], yet is simple and fast --
 * approximately 30 million random 32-bit integers per second on a
 * 850MHz PC.  The period is a*b^n, where a is the multiplier, n
 * the size of the seed table and b=2^32-1.  (a is chosen so that
 * b is a primitive root of the prime a*b^n + 1.)"
 * 
 * [3] Marsaglia, G., Zaman, A., and Tsang, W.  Toward a universal
 * random number generator.  _Statistics and Probability Letters
 * 8_ (1990), 35-39.
 */
 class GMRand : public Rand
 {
 public:
 	enum { SEED_COUNT = 8 };
 //	const uint64 a = 123471786;		// for SEED_COUNT=1024
 //	const uint64 a = 123554632;		// for SEED_COUNT=512
 //	const uint64 a = 8001634;		// for SEED_COUNT=255
 //	const uint64 a = 8007626;		// for SEED_COUNT=128
 //	const uint64 a = 647535442;		// for SEED_COUNT=64
 //	const uint64 a = 547416522;		// for SEED_COUNT=32
 //	const uint64 a = 487198574;		// for SEED_COUNT=16
 //	const uint64 a = 716514398U;	// for SEED_COUNT=8
 	enum { a = 716514398U };
 	GMRand( time_e )
 	{
 		seed(randomSeed());
 	}
 	GMRand(uint s = 987654321)
 	{
 		seed(s);
 	}
 	/// Provide a new seed.
 	virtual void seed( uint s )
 	{
 		c = 362436;
 		i = SEED_COUNT - 1;
 		for(int i = 0; i < SEED_COUNT; i++) {
 			s = s ^ (s << 13);
 			s = s ^ (s >> 17);
 			s = s ^ (s << 5);
 			Q[i] = s;
 		}
 	}
 	/// Get a random number between 0 - 65536.
 	virtual uint get()
 	{
 		const uint32 r = 0xFFFFFFFE;		
 		uint64 t;
 		uint32 x;
 		i = (i + 1) & (SEED_COUNT - 1);
 		t = a * Q[i] + c;
 		c = uint32(t >> 32);
 		x = uint32(t + c);
 		if( x < c ) {
 			x++;
 			c++;
 		}
 		uint32  val = r - x;
 		Q[i] = val;
 		return val;
 	};
 private:
 	uint32 c;
 	uint32 i;
 	uint32 Q[8];
 };
 /** Random number implementation from the GNU Sci. Lib. (GSL).
 * Adapted from Nicholas Chapman version:
 * 
 * Copyright (C) 1996, 1997, 1998, 1999, 2000 James Theiler, Brian Gough
 * This is the Unix rand48() generator. The generator returns the
 * upper 32 bits from each term of the sequence,
 * 
 * x_{n+1} = (a x_n + c) mod m 
 * 
 * using 48-bit unsigned arithmetic, with a = 0x5DEECE66D , c = 0xB
 * and m = 2^48. The seed specifies the upper 32 bits of the initial
 * value, x_1, with the lower 16 bits set to 0x330E.
 * 
 * The theoretical value of x_{10001} is 244131582646046.
 * 
 * The period of this generator is ? FIXME (probably around 2^48). 
 */
 class Rand48 : public Rand
 {
 public:
 	Rand48( time_e )
 	{
 		seed(randomSeed());
 	}
 	Rand48( uint s = 0x1234ABCD )
 	{
 		seed(s);
 	}	
 	/** Set the given seed. */
 	virtual void seed( uint s ) {
 		vstate.x0 = 0x330E;
 		vstate.x1 = uint16(s & 0xFFFF);
 		vstate.x2 = uint16((s >> 16) & 0xFFFF);
 	}
 	/** Get a random number. */
 	virtual uint get() {
 		advance();
 		uint x1 = vstate.x1;
 		uint x2 = vstate.x2;
 		return (x2 << 16) + x1;
 	}
 private:
 	void advance()
 	{
 		/* work with unsigned long ints throughout to get correct integer
 		promotions of any unsigned short ints */
 		const uint32 x0 = vstate.x0;
 		const uint32 x1 = vstate.x1;
 		const uint32 x2 = vstate.x2;
 		uint32 a;
 		a = a0 * x0 + c0;
 		vstate.x0 = uint16(a & 0xFFFF);
 		a >>= 16;
 		/* although the next line may overflow we only need the top 16 bits
 		in the following stage, so it does not matter */
 		a += a0 * x1 + a1 * x0; 
 		vstate.x1 = uint16(a & 0xFFFF);
 		a >>= 16;
 		a += a0 * x2 + a1 * x1 + a2 * x0;
 		vstate.x2 = uint16(a & 0xFFFF);
 	}
 private:	
 	NVMATH_API static const uint16 a0, a1, a2, c0;
 	struct rand48_state_t { 
 		uint16 x0, x1, x2; 
 	} vstate;
 };
 } // nv namespace
 #endif // NV_MATH_RANDOM_H