diff --git a/src/nvcore/Utils.h b/src/nvcore/Utils.h index 6c11185..68167be 100644 --- a/src/nvcore/Utils.h +++ b/src/nvcore/Utils.h @@ -25,6 +25,12 @@ namespace nv inline uint32 asUnsigned(int32 x) { return (uint32) x; } inline uint64 asUnsigned(int64 x) { return (uint64) x; } + template inline uint32 toU32(T x) { + nvDebugCheck(x <= UINT32_MAX); + nvDebugCheck(x >= 0); + return (uint32) x; + } + /* template inline int8 toI8(T x) { nvDebugCheck(x <= INT8_MAX); diff --git a/src/nvthread/Atomic.h b/src/nvthread/Atomic.h index fbc2694..8315d81 100644 --- a/src/nvthread/Atomic.h +++ b/src/nvthread/Atomic.h @@ -34,16 +34,16 @@ extern "C" namespace nv { // Load and stores. - inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; } - inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; } + inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; } + inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; } - inline uint32 loadAcquire(const volatile uint32 * ptr) + inline uint32 loadAcquire(const volatile uint32 * ptr) { nvDebugCheck((intptr_t(ptr) & 3) == 0); #if POSH_CPU_X86 || POSH_CPU_X86_64 nvCompilerReadBarrier(); - uint32 ret = *ptr; // on x86, loads are Acquire + uint32 ret = *ptr; // on x86, loads are Acquire nvCompilerReadBarrier(); return ret; #else @@ -51,17 +51,17 @@ namespace nv { #endif } - inline void storeRelease(volatile uint32 * ptr, uint32 value) + inline void storeRelease(volatile uint32 * ptr, uint32 value) { nvDebugCheck((intptr_t(ptr) & 3) == 0); nvDebugCheck((intptr_t(&value) & 3) == 0); #if POSH_CPU_X86 || POSH_CPU_X86_64 nvCompilerWriteBarrier(); - *ptr = value; // on x86, stores are Release - nvCompilerWriteBarrier(); + *ptr = value; // on x86, stores are Release + nvCompilerWriteBarrier(); #else -#error "Not implemented" +#error "Atomics not implemented." #endif } @@ -84,6 +84,25 @@ namespace nv { return (uint32)_InterlockedDecrement((long *)value); } +#elif NV_CC_GNUC + // Many alternative implementations at: + // http://www.memoryhole.net/kyle/2007/05/atomic_incrementing.html + + inline uint32 atomicIncrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + + return __sync_fetch_and_add(value, 1); + } + + inline uint32 atomicDecrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + + return __sync_fetch_and_sub(value, 1); + } +#else +#error "Atomics not implemented." #endif @@ -107,19 +126,19 @@ namespace nv { - template - class Atomic - { - public: - explicit Atomic() : m_value() { } - explicit Atomic( T val ) : m_value(val) { } - ~Atomic() { } - - T loadRelaxed() const { return m_value; } - void storeRelaxed(T val) { m_value = val; } + template + class Atomic + { + public: + explicit Atomic() : m_value() { } + explicit Atomic( T val ) : m_value(val) { } + ~Atomic() { } + + T loadRelaxed() const { return m_value; } + void storeRelaxed(T val) { m_value = val; } //T loadAcquire() const volatile { return nv::loadAcquire(&m_value); } - //void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); } + //void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); } void increment() /*volatile*/ { nv::atomicIncrement(m_value); } void decrement() /*volatile*/ { nv::atomicDecrement(m_value); } @@ -128,14 +147,14 @@ namespace nv { T compareAndExchange(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); } T exchange(T newVal) { nv::atomicExchange(&m_value, newVal); } - private: - // don't provide operator = or == ; make the client write Store( Load() ) - NV_FORBID_COPY(Atomic); + private: + // don't provide operator = or == ; make the client write Store( Load() ) + NV_FORBID_COPY(Atomic); - NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64)); - - T m_value; - }; + NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64)); + + T m_value; + }; #endif } // nv namespace diff --git a/src/nvthread/CMakeLists.txt b/src/nvthread/CMakeLists.txt index 435141c..53f9a67 100644 --- a/src/nvthread/CMakeLists.txt +++ b/src/nvthread/CMakeLists.txt @@ -1,11 +1,13 @@ PROJECT(nvthreads) SET(THREADS_SRCS - nvthreads.h + nvthread.h nvthread.cpp + Atomic.h + Event.h Event.cpp Mutex.h Mutex.cpp - SpinWaiter.h SpinWaiter.cpp + ParallelFor.h ParallelFor.cpp Thread.h Thread.cpp - ThreadLocalStorage.h ThreadLocalStorage.cpp) + ThreadPool.h ThreadPool.cpp) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/nvthread/Event.cpp b/src/nvthread/Event.cpp index d39f54c..a86893d 100644 --- a/src/nvthread/Event.cpp +++ b/src/nvthread/Event.cpp @@ -13,7 +13,7 @@ using namespace nv; #if NV_OS_WIN32 struct Event::Private { - HANDLE handle; + HANDLE handle; }; Event::Event() : m(new Private) { @@ -48,5 +48,6 @@ void Event::wait() { } #elif NV_OS_UNIX - // @@ + // @@ TODO +#pragma NV_MESSAGE("Implement event using pthreads!") #endif diff --git a/src/nvthread/Event.h b/src/nvthread/Event.h index c8ff1d0..1e738a8 100644 --- a/src/nvthread/Event.h +++ b/src/nvthread/Event.h @@ -11,23 +11,23 @@ namespace nv { // This is intended to be used by a single waiter thread. - class NVTHREAD_CLASS Event - { - NV_FORBID_COPY(Event); - public: - Event(); - ~Event(); + class NVTHREAD_CLASS Event + { + NV_FORBID_COPY(Event); + public: + Event(); + ~Event(); - void post(); - void wait(); // Wait resets the event. + void post(); + void wait(); // Wait resets the event. static void post(Event * events, uint count); static void wait(Event * events, uint count); - private: - struct Private; - AutoPtr m; - }; + private: + struct Private; + AutoPtr m; + }; } // nv namespace diff --git a/src/nvthread/Mutex.cpp b/src/nvthread/Mutex.cpp index 698c879..cb6ebfc 100644 --- a/src/nvthread/Mutex.cpp +++ b/src/nvthread/Mutex.cpp @@ -19,71 +19,71 @@ using namespace nv; #if NV_OS_WIN32 struct Mutex::Private { - CRITICAL_SECTION mutex; + CRITICAL_SECTION mutex; }; Mutex::Mutex () : m(new Private) { - InitializeCriticalSection(&m->mutex); + InitializeCriticalSection(&m->mutex); } Mutex::~Mutex () { - DeleteCriticalSection(&m->mutex); + DeleteCriticalSection(&m->mutex); } void Mutex::lock() { - EnterCriticalSection(&m->mutex); + EnterCriticalSection(&m->mutex); } bool Mutex::tryLock() { - return TryEnterCriticalSection(&m->mutex) != 0; + return TryEnterCriticalSection(&m->mutex) != 0; } void Mutex::unlock() { - LeaveCriticalSection(&m->mutex); + LeaveCriticalSection(&m->mutex); } #elif NV_OS_UNIX struct Mutex::Private { - pthread_mutex_t mutex; + pthread_mutex_t mutex; }; Mutex::Mutex () : m(new Private) { - int result = pthread_mutex_init(&m->mutex , NULL); - nvDebugCheck(result == 0); + int result = pthread_mutex_init(&m->mutex , NULL); + nvDebugCheck(result == 0); } Mutex::~Mutex () { - int result = pthread_mutex_destroy(&m->mutex); - nvDebugCheck(result == 0); + int result = pthread_mutex_destroy(&m->mutex); + nvDebugCheck(result == 0); } void Mutex::lock() { - int result = pthread_mutex_lock(&m->mutex); - nvDebugCheck(result == 0); + int result = pthread_mutex_lock(&m->mutex); + nvDebugCheck(result == 0); } bool Mutex::tryLock() { - int result = pthread_mutex_trylock(&m->mutex); - nvDebugCheck(result == 0 || result == EBUSY); - return result == 0; + int result = pthread_mutex_trylock(&m->mutex); + nvDebugCheck(result == 0 || result == EBUSY); + return result == 0; } void Mutex::unlock() { - int result = pthread_mutex_unlock(&m->mutex); - nvDebugCheck(result == 0); + int result = pthread_mutex_unlock(&m->mutex); + nvDebugCheck(result == 0); } -#endif // NV_OS \ No newline at end of file +#endif // NV_OS_UNIX diff --git a/src/nvthread/Mutex.h b/src/nvthread/Mutex.h index 841fc3d..13e34e0 100644 --- a/src/nvthread/Mutex.h +++ b/src/nvthread/Mutex.h @@ -11,36 +11,36 @@ namespace nv { - class NVTHREAD_CLASS Mutex - { - NV_FORBID_COPY(Mutex); - public: - Mutex (); - ~Mutex (); + class NVTHREAD_CLASS Mutex + { + NV_FORBID_COPY(Mutex); + public: + Mutex (); + ~Mutex (); - void lock(); - bool tryLock(); - void unlock(); + void lock(); + bool tryLock(); + void unlock(); - private: - struct Private; - AutoPtr m; - }; + private: + struct Private; + AutoPtr m; +}; // Templated lock that can be used with any mutex. template - class Lock - { - NV_FORBID_COPY(Lock); - public: - - Lock (M & m) : m_mutex (m) { m_mutex.lock(); } - ~Lock () { m_mutex.unlock(); } - - private: - M & m_mutex; - }; + class Lock + { + NV_FORBID_COPY(Lock); + public: + + Lock (M & m) : m_mutex (m) { m_mutex.lock(); } + ~Lock () { m_mutex.unlock(); } + + private: + M & m_mutex; + }; } // nv namespace diff --git a/src/nvthread/Thread.cpp b/src/nvthread/Thread.cpp index c8c39d8..68dc0c5 100644 --- a/src/nvthread/Thread.cpp +++ b/src/nvthread/Thread.cpp @@ -3,10 +3,10 @@ #include "Thread.h" #if NV_OS_WIN32 - #include "Win32.h" + #include "Win32.h" #elif NV_OS_UNIX - #include - #include // usleep + #include + #include // usleep #endif using namespace nv; @@ -14,15 +14,16 @@ using namespace nv; struct Thread::Private { #if NV_OS_WIN32 - HANDLE thread; + HANDLE thread; #elif NV_OS_UNIX - pthread_t thread; + pthread_t thread; #endif ThreadFunc * func; void * arg; }; + #if NV_OS_WIN32 unsigned long __stdcall threadFunc(void * arg) { @@ -32,11 +33,13 @@ unsigned long __stdcall threadFunc(void * arg) { } #elif NV_OS_UNIX + extern "C" void * threadFunc(void * arg) { Thread * thread = (Thread *)arg; - thread->func(thread->arg); - pthread_exit(0); + thread->func(thread->arg); + pthread_exit(0); } + #endif @@ -47,7 +50,7 @@ Thread::Thread() : p(new Private) Thread::~Thread() { - nvDebugCheck(p->thread == 0); + nvDebugCheck(p->thread == 0); } void Thread::start(ThreadFunc * func, void * arg) @@ -56,12 +59,12 @@ void Thread::start(ThreadFunc * func, void * arg) this->arg = arg; #if NV_OS_WIN32 - p->thread = CreateThread(NULL, 0, threadFunc, this, 0, NULL); - //p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, this, 0, NULL); // @@ So that we can call CRT functions... - nvDebugCheck(p->thread != NULL); + p->thread = CreateThread(NULL, 0, threadFunc, this, 0, NULL); + //p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, this, 0, NULL); // @@ So that we can call CRT functions... + nvDebugCheck(p->thread != NULL); #elif NV_OS_UNIX - int result = pthread_create(&p->thread, NULL, threadFunc, this); - nvDebugCheck(result == 0); + int result = pthread_create(&p->thread, NULL, threadFunc, this); + nvDebugCheck(result == 0); #endif } @@ -74,42 +77,42 @@ void Thread::wait() p->thread = NULL; nvCheck (ok); #elif NV_OS_UNIX - int result = pthread_join(p->thread, NULL); + int result = pthread_join(p->thread, NULL); p->thread = 0; - nvDebugCheck(result == 0); + nvDebugCheck(result == 0); #endif } bool Thread::isRunning () const { #if NV_OS_WIN32 - return p->thread != NULL; + return p->thread != NULL; #elif NV_OS_UNIX - return p->thread != 0; + return p->thread != 0; #endif } /*static*/ void Thread::spinWait(uint count) { - for (uint i = 0; i < count; i++) {} + for (uint i = 0; i < count; i++) {} } /*static*/ void Thread::yield() { #if NV_OS_WIN32 - SwitchToThread(); + SwitchToThread(); #elif NV_OS_UNIX - int result = sched_yield(); - nvDebugCheck(result == 0); + int result = sched_yield(); + nvDebugCheck(result == 0); #endif } /*static*/ void Thread::sleep(uint ms) { #if NV_OS_WIN32 - Sleep(ms); + Sleep(ms); #elif NV_OS_UNIX - usleep(1000 * ms); + usleep(1000 * ms); #endif } @@ -133,4 +136,5 @@ bool Thread::isRunning () const threads[i].wait(); } //#endif -} \ No newline at end of file +} + diff --git a/src/nvthread/Thread.h b/src/nvthread/Thread.h index cdd5b70..0e46564 100644 --- a/src/nvthread/Thread.h +++ b/src/nvthread/Thread.h @@ -12,34 +12,34 @@ namespace nv { typedef void ThreadFunc(void * arg); - class NVTHREAD_CLASS Thread - { - NV_FORBID_COPY(Thread); - public: - Thread(); - ~Thread(); + class NVTHREAD_CLASS Thread + { + NV_FORBID_COPY(Thread); + public: + Thread(); + ~Thread(); - void start(ThreadFunc * func, void * arg); - void wait(); + void start(ThreadFunc * func, void * arg); + void wait(); - bool isRunning() const; + bool isRunning() const; - static void spinWait(uint count); - static void yield(); - static void sleep(uint ms); + static void spinWait(uint count); + static void yield(); + static void sleep(uint ms); - static void wait(Thread * threads, uint count); - - private: + static void wait(Thread * threads, uint count); - struct Private; - AutoPtr p; - - public: + private: + + struct Private; + AutoPtr p; + + public: // @@ Why public? Also in private?! ThreadFunc * func; void * arg; - }; + }; } // nv namespace diff --git a/src/nvthread/ThreadPool.cpp b/src/nvthread/ThreadPool.cpp index a343fab..af111f1 100644 --- a/src/nvthread/ThreadPool.cpp +++ b/src/nvthread/ThreadPool.cpp @@ -4,6 +4,8 @@ #include "Mutex.h" #include "Thread.h" +#include "nvcore/Utils.h" + // Most of the time it's not necessary to protect the thread pool, but if it doesn't add a significant overhead, then it'd be safer to do it. #define PROTECT_THREAD_POOL 1 @@ -47,7 +49,7 @@ AutoPtr s_pool; /*static*/ void ThreadPool::workerFunc(void * arg) { - uint i = (uint)arg; + uint i = toU32((uintptr_t)arg); // This is OK, because workerCount should always be <<< 2^32 while(true) { @@ -118,4 +120,4 @@ void ThreadPool::wait() allIdle = true; } -} \ No newline at end of file +} diff --git a/src/nvthread/nvthread.cpp b/src/nvthread/nvthread.cpp index 0d40f86..db46927 100644 --- a/src/nvthread/nvthread.cpp +++ b/src/nvthread/nvthread.cpp @@ -1,11 +1,16 @@ +// This code is in the public domain -- Ignacio Castaņo #include "nvthread.h" #include "Thread.h" -#define WIN32_LEAN_AND_MEAN -#define VC_EXTRALEAN -#include +#if NV_OS_WIN32 + #include "Win32.h" +#elif NV_OS_UNIX + #include + #include +#endif + using namespace nv; diff --git a/src/nvtt/tests/testsuite.cpp b/src/nvtt/tests/testsuite.cpp index 9645238..fb31c47 100644 --- a/src/nvtt/tests/testsuite.cpp +++ b/src/nvtt/tests/testsuite.cpp @@ -269,6 +269,10 @@ struct MyOutputHandler : public nvtt::OutputHandler m_ptr = m_data; } + virtual void endImage() + { + } + virtual bool writeData(const void * data, int size) { memcpy(m_ptr, data, size);