Porting parallel stuff to unix.

This commit is contained in:
castano 2011-09-27 18:12:32 +00:00
parent 5081360073
commit 2364f539eb
12 changed files with 181 additions and 138 deletions

View File

@ -25,6 +25,12 @@ namespace nv
inline uint32 asUnsigned(int32 x) { return (uint32) x; } inline uint32 asUnsigned(int32 x) { return (uint32) x; }
inline uint64 asUnsigned(int64 x) { return (uint64) x; } inline uint64 asUnsigned(int64 x) { return (uint64) x; }
template <typename T> inline uint32 toU32(T x) {
nvDebugCheck(x <= UINT32_MAX);
nvDebugCheck(x >= 0);
return (uint32) x;
}
/* /*
template <typename T> inline int8 toI8(T x) { template <typename T> inline int8 toI8(T x) {
nvDebugCheck(x <= INT8_MAX); nvDebugCheck(x <= INT8_MAX);

View File

@ -34,16 +34,16 @@ extern "C"
namespace nv { namespace nv {
// Load and stores. // Load and stores.
inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; } inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; }
inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; } inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; }
inline uint32 loadAcquire(const volatile uint32 * ptr) inline uint32 loadAcquire(const volatile uint32 * ptr)
{ {
nvDebugCheck((intptr_t(ptr) & 3) == 0); nvDebugCheck((intptr_t(ptr) & 3) == 0);
#if POSH_CPU_X86 || POSH_CPU_X86_64 #if POSH_CPU_X86 || POSH_CPU_X86_64
nvCompilerReadBarrier(); nvCompilerReadBarrier();
uint32 ret = *ptr; // on x86, loads are Acquire uint32 ret = *ptr; // on x86, loads are Acquire
nvCompilerReadBarrier(); nvCompilerReadBarrier();
return ret; return ret;
#else #else
@ -51,17 +51,17 @@ namespace nv {
#endif #endif
} }
inline void storeRelease(volatile uint32 * ptr, uint32 value) inline void storeRelease(volatile uint32 * ptr, uint32 value)
{ {
nvDebugCheck((intptr_t(ptr) & 3) == 0); nvDebugCheck((intptr_t(ptr) & 3) == 0);
nvDebugCheck((intptr_t(&value) & 3) == 0); nvDebugCheck((intptr_t(&value) & 3) == 0);
#if POSH_CPU_X86 || POSH_CPU_X86_64 #if POSH_CPU_X86 || POSH_CPU_X86_64
nvCompilerWriteBarrier(); nvCompilerWriteBarrier();
*ptr = value; // on x86, stores are Release *ptr = value; // on x86, stores are Release
nvCompilerWriteBarrier(); nvCompilerWriteBarrier();
#else #else
#error "Not implemented" #error "Atomics not implemented."
#endif #endif
} }
@ -84,6 +84,25 @@ namespace nv {
return (uint32)_InterlockedDecrement((long *)value); return (uint32)_InterlockedDecrement((long *)value);
} }
#elif NV_CC_GNUC
// Many alternative implementations at:
// http://www.memoryhole.net/kyle/2007/05/atomic_incrementing.html
inline uint32 atomicIncrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_fetch_and_add(value, 1);
}
inline uint32 atomicDecrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_fetch_and_sub(value, 1);
}
#else
#error "Atomics not implemented."
#endif #endif
@ -107,19 +126,19 @@ namespace nv {
template <typename T> template <typename T>
class Atomic class Atomic
{ {
public: public:
explicit Atomic() : m_value() { } explicit Atomic() : m_value() { }
explicit Atomic( T val ) : m_value(val) { } explicit Atomic( T val ) : m_value(val) { }
~Atomic() { } ~Atomic() { }
T loadRelaxed() const { return m_value; } T loadRelaxed() const { return m_value; }
void storeRelaxed(T val) { m_value = val; } void storeRelaxed(T val) { m_value = val; }
//T loadAcquire() const volatile { return nv::loadAcquire(&m_value); } //T loadAcquire() const volatile { return nv::loadAcquire(&m_value); }
//void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); } //void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); }
void increment() /*volatile*/ { nv::atomicIncrement(m_value); } void increment() /*volatile*/ { nv::atomicIncrement(m_value); }
void decrement() /*volatile*/ { nv::atomicDecrement(m_value); } void decrement() /*volatile*/ { nv::atomicDecrement(m_value); }
@ -128,14 +147,14 @@ namespace nv {
T compareAndExchange(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); } T compareAndExchange(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
T exchange(T newVal) { nv::atomicExchange(&m_value, newVal); } T exchange(T newVal) { nv::atomicExchange(&m_value, newVal); }
private: private:
// don't provide operator = or == ; make the client write Store( Load() ) // don't provide operator = or == ; make the client write Store( Load() )
NV_FORBID_COPY(Atomic); NV_FORBID_COPY(Atomic);
NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64)); NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64));
T m_value; T m_value;
}; };
#endif #endif
} // nv namespace } // nv namespace

View File

@ -1,11 +1,13 @@
PROJECT(nvthreads) PROJECT(nvthreads)
SET(THREADS_SRCS SET(THREADS_SRCS
nvthreads.h nvthread.h nvthread.cpp
Atomic.h
Event.h Event.cpp
Mutex.h Mutex.cpp Mutex.h Mutex.cpp
SpinWaiter.h SpinWaiter.cpp ParallelFor.h ParallelFor.cpp
Thread.h Thread.cpp Thread.h Thread.cpp
ThreadLocalStorage.h ThreadLocalStorage.cpp) ThreadPool.h ThreadPool.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -13,7 +13,7 @@ using namespace nv;
#if NV_OS_WIN32 #if NV_OS_WIN32
struct Event::Private { struct Event::Private {
HANDLE handle; HANDLE handle;
}; };
Event::Event() : m(new Private) { Event::Event() : m(new Private) {
@ -48,5 +48,6 @@ void Event::wait() {
} }
#elif NV_OS_UNIX #elif NV_OS_UNIX
// @@ // @@ TODO
#pragma NV_MESSAGE("Implement event using pthreads!")
#endif #endif

View File

@ -11,23 +11,23 @@
namespace nv namespace nv
{ {
// This is intended to be used by a single waiter thread. // This is intended to be used by a single waiter thread.
class NVTHREAD_CLASS Event class NVTHREAD_CLASS Event
{ {
NV_FORBID_COPY(Event); NV_FORBID_COPY(Event);
public: public:
Event(); Event();
~Event(); ~Event();
void post(); void post();
void wait(); // Wait resets the event. void wait(); // Wait resets the event.
static void post(Event * events, uint count); static void post(Event * events, uint count);
static void wait(Event * events, uint count); static void wait(Event * events, uint count);
private: private:
struct Private; struct Private;
AutoPtr<Private> m; AutoPtr<Private> m;
}; };
} // nv namespace } // nv namespace

View File

@ -19,71 +19,71 @@ using namespace nv;
#if NV_OS_WIN32 #if NV_OS_WIN32
struct Mutex::Private { struct Mutex::Private {
CRITICAL_SECTION mutex; CRITICAL_SECTION mutex;
}; };
Mutex::Mutex () : m(new Private) Mutex::Mutex () : m(new Private)
{ {
InitializeCriticalSection(&m->mutex); InitializeCriticalSection(&m->mutex);
} }
Mutex::~Mutex () Mutex::~Mutex ()
{ {
DeleteCriticalSection(&m->mutex); DeleteCriticalSection(&m->mutex);
} }
void Mutex::lock() void Mutex::lock()
{ {
EnterCriticalSection(&m->mutex); EnterCriticalSection(&m->mutex);
} }
bool Mutex::tryLock() bool Mutex::tryLock()
{ {
return TryEnterCriticalSection(&m->mutex) != 0; return TryEnterCriticalSection(&m->mutex) != 0;
} }
void Mutex::unlock() void Mutex::unlock()
{ {
LeaveCriticalSection(&m->mutex); LeaveCriticalSection(&m->mutex);
} }
#elif NV_OS_UNIX #elif NV_OS_UNIX
struct Mutex::Private { struct Mutex::Private {
pthread_mutex_t mutex; pthread_mutex_t mutex;
}; };
Mutex::Mutex () : m(new Private) Mutex::Mutex () : m(new Private)
{ {
int result = pthread_mutex_init(&m->mutex , NULL); int result = pthread_mutex_init(&m->mutex , NULL);
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
} }
Mutex::~Mutex () Mutex::~Mutex ()
{ {
int result = pthread_mutex_destroy(&m->mutex); int result = pthread_mutex_destroy(&m->mutex);
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
} }
void Mutex::lock() void Mutex::lock()
{ {
int result = pthread_mutex_lock(&m->mutex); int result = pthread_mutex_lock(&m->mutex);
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
} }
bool Mutex::tryLock() bool Mutex::tryLock()
{ {
int result = pthread_mutex_trylock(&m->mutex); int result = pthread_mutex_trylock(&m->mutex);
nvDebugCheck(result == 0 || result == EBUSY); nvDebugCheck(result == 0 || result == EBUSY);
return result == 0; return result == 0;
} }
void Mutex::unlock() void Mutex::unlock()
{ {
int result = pthread_mutex_unlock(&m->mutex); int result = pthread_mutex_unlock(&m->mutex);
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
} }
#endif // NV_OS #endif // NV_OS_UNIX

View File

@ -11,36 +11,36 @@
namespace nv namespace nv
{ {
class NVTHREAD_CLASS Mutex class NVTHREAD_CLASS Mutex
{ {
NV_FORBID_COPY(Mutex); NV_FORBID_COPY(Mutex);
public: public:
Mutex (); Mutex ();
~Mutex (); ~Mutex ();
void lock(); void lock();
bool tryLock(); bool tryLock();
void unlock(); void unlock();
private: private:
struct Private; struct Private;
AutoPtr<Private> m; AutoPtr<Private> m;
}; };
// Templated lock that can be used with any mutex. // Templated lock that can be used with any mutex.
template <class M> template <class M>
class Lock class Lock
{ {
NV_FORBID_COPY(Lock); NV_FORBID_COPY(Lock);
public: public:
Lock (M & m) : m_mutex (m) { m_mutex.lock(); } Lock (M & m) : m_mutex (m) { m_mutex.lock(); }
~Lock () { m_mutex.unlock(); } ~Lock () { m_mutex.unlock(); }
private: private:
M & m_mutex; M & m_mutex;
}; };
} // nv namespace } // nv namespace

View File

@ -3,10 +3,10 @@
#include "Thread.h" #include "Thread.h"
#if NV_OS_WIN32 #if NV_OS_WIN32
#include "Win32.h" #include "Win32.h"
#elif NV_OS_UNIX #elif NV_OS_UNIX
#include <pthread.h> #include <pthread.h>
#include <unistd.h> // usleep #include <unistd.h> // usleep
#endif #endif
using namespace nv; using namespace nv;
@ -14,15 +14,16 @@ using namespace nv;
struct Thread::Private struct Thread::Private
{ {
#if NV_OS_WIN32 #if NV_OS_WIN32
HANDLE thread; HANDLE thread;
#elif NV_OS_UNIX #elif NV_OS_UNIX
pthread_t thread; pthread_t thread;
#endif #endif
ThreadFunc * func; ThreadFunc * func;
void * arg; void * arg;
}; };
#if NV_OS_WIN32 #if NV_OS_WIN32
unsigned long __stdcall threadFunc(void * arg) { unsigned long __stdcall threadFunc(void * arg) {
@ -32,11 +33,13 @@ unsigned long __stdcall threadFunc(void * arg) {
} }
#elif NV_OS_UNIX #elif NV_OS_UNIX
extern "C" void * threadFunc(void * arg) { extern "C" void * threadFunc(void * arg) {
Thread * thread = (Thread *)arg; Thread * thread = (Thread *)arg;
thread->func(thread->arg); thread->func(thread->arg);
pthread_exit(0); pthread_exit(0);
} }
#endif #endif
@ -47,7 +50,7 @@ Thread::Thread() : p(new Private)
Thread::~Thread() Thread::~Thread()
{ {
nvDebugCheck(p->thread == 0); nvDebugCheck(p->thread == 0);
} }
void Thread::start(ThreadFunc * func, void * arg) void Thread::start(ThreadFunc * func, void * arg)
@ -57,11 +60,11 @@ void Thread::start(ThreadFunc * func, void * arg)
#if NV_OS_WIN32 #if NV_OS_WIN32
p->thread = CreateThread(NULL, 0, threadFunc, this, 0, NULL); p->thread = CreateThread(NULL, 0, threadFunc, this, 0, NULL);
//p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, this, 0, NULL); // @@ So that we can call CRT functions... //p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, this, 0, NULL); // @@ So that we can call CRT functions...
nvDebugCheck(p->thread != NULL); nvDebugCheck(p->thread != NULL);
#elif NV_OS_UNIX #elif NV_OS_UNIX
int result = pthread_create(&p->thread, NULL, threadFunc, this); int result = pthread_create(&p->thread, NULL, threadFunc, this);
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
#endif #endif
} }
@ -74,42 +77,42 @@ void Thread::wait()
p->thread = NULL; p->thread = NULL;
nvCheck (ok); nvCheck (ok);
#elif NV_OS_UNIX #elif NV_OS_UNIX
int result = pthread_join(p->thread, NULL); int result = pthread_join(p->thread, NULL);
p->thread = 0; p->thread = 0;
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
#endif #endif
} }
bool Thread::isRunning () const bool Thread::isRunning () const
{ {
#if NV_OS_WIN32 #if NV_OS_WIN32
return p->thread != NULL; return p->thread != NULL;
#elif NV_OS_UNIX #elif NV_OS_UNIX
return p->thread != 0; return p->thread != 0;
#endif #endif
} }
/*static*/ void Thread::spinWait(uint count) /*static*/ void Thread::spinWait(uint count)
{ {
for (uint i = 0; i < count; i++) {} for (uint i = 0; i < count; i++) {}
} }
/*static*/ void Thread::yield() /*static*/ void Thread::yield()
{ {
#if NV_OS_WIN32 #if NV_OS_WIN32
SwitchToThread(); SwitchToThread();
#elif NV_OS_UNIX #elif NV_OS_UNIX
int result = sched_yield(); int result = sched_yield();
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
#endif #endif
} }
/*static*/ void Thread::sleep(uint ms) /*static*/ void Thread::sleep(uint ms)
{ {
#if NV_OS_WIN32 #if NV_OS_WIN32
Sleep(ms); Sleep(ms);
#elif NV_OS_UNIX #elif NV_OS_UNIX
usleep(1000 * ms); usleep(1000 * ms);
#endif #endif
} }
@ -134,3 +137,4 @@ bool Thread::isRunning () const
} }
//#endif //#endif
} }

View File

@ -12,34 +12,34 @@ namespace nv
{ {
typedef void ThreadFunc(void * arg); typedef void ThreadFunc(void * arg);
class NVTHREAD_CLASS Thread class NVTHREAD_CLASS Thread
{ {
NV_FORBID_COPY(Thread); NV_FORBID_COPY(Thread);
public:
Thread();
~Thread();
void start(ThreadFunc * func, void * arg);
void wait();
bool isRunning() const;
static void spinWait(uint count);
static void yield();
static void sleep(uint ms);
static void wait(Thread * threads, uint count);
private:
struct Private;
AutoPtr<Private> p;
public: public:
Thread();
~Thread();
void start(ThreadFunc * func, void * arg);
void wait();
bool isRunning() const;
static void spinWait(uint count);
static void yield();
static void sleep(uint ms);
static void wait(Thread * threads, uint count);
private:
struct Private;
AutoPtr<Private> p;
public: // @@ Why public? Also in private?!
ThreadFunc * func; ThreadFunc * func;
void * arg; void * arg;
}; };
} // nv namespace } // nv namespace

View File

@ -4,6 +4,8 @@
#include "Mutex.h" #include "Mutex.h"
#include "Thread.h" #include "Thread.h"
#include "nvcore/Utils.h"
// Most of the time it's not necessary to protect the thread pool, but if it doesn't add a significant overhead, then it'd be safer to do it. // Most of the time it's not necessary to protect the thread pool, but if it doesn't add a significant overhead, then it'd be safer to do it.
#define PROTECT_THREAD_POOL 1 #define PROTECT_THREAD_POOL 1
@ -47,7 +49,7 @@ AutoPtr<ThreadPool> s_pool;
/*static*/ void ThreadPool::workerFunc(void * arg) { /*static*/ void ThreadPool::workerFunc(void * arg) {
uint i = (uint)arg; uint i = toU32((uintptr_t)arg); // This is OK, because workerCount should always be <<< 2^32
while(true) while(true)
{ {

View File

@ -1,11 +1,16 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#include "nvthread.h" #include "nvthread.h"
#include "Thread.h" #include "Thread.h"
#define WIN32_LEAN_AND_MEAN #if NV_OS_WIN32
#define VC_EXTRALEAN #include "Win32.h"
#include <windows.h> #elif NV_OS_UNIX
#include <sys/types.h>
#include <sys/sysctl.h>
#endif
using namespace nv; using namespace nv;

View File

@ -269,6 +269,10 @@ struct MyOutputHandler : public nvtt::OutputHandler
m_ptr = m_data; m_ptr = m_data;
} }
virtual void endImage()
{
}
virtual bool writeData(const void * data, int size) virtual bool writeData(const void * data, int size)
{ {
memcpy(m_ptr, data, size); memcpy(m_ptr, data, size);