396 lines
13 KiB
C++
396 lines
13 KiB
C++
// This code is in the public domain -- castanyo@yahoo.es
|
|
|
|
#ifndef NV_THREAD_ATOMIC_H
|
|
#define NV_THREAD_ATOMIC_H
|
|
|
|
#include "nvthread.h"
|
|
|
|
#include "nvcore/Debug.h"
|
|
|
|
|
|
#if NV_CC_MSVC
|
|
|
|
#include <intrin.h> // Already included by nvthread.h
|
|
|
|
#pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement)
|
|
#pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange)
|
|
//#pragma intrinsic(_InterlockedExchangeAdd64)
|
|
|
|
/*
|
|
extern "C"
|
|
{
|
|
#pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement)
|
|
LONG __cdecl _InterlockedIncrement(long volatile *Addend);
|
|
LONG __cdecl _InterlockedDecrement(long volatile *Addend);
|
|
|
|
#pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange)
|
|
LONG __cdecl _InterlockedCompareExchange(long volatile * Destination, long Exchange, long Compared);
|
|
LONG __cdecl _InterlockedExchange(long volatile * Target, LONG Value);
|
|
}
|
|
*/
|
|
|
|
#endif // NV_CC_MSVC
|
|
|
|
#if NV_CC_CLANG && POSH_CPU_STRONGARM
|
|
// LLVM/Clang do not yet have functioning atomics as of 2.1
|
|
// #include <atomic>
|
|
#endif
|
|
|
|
//ACS: need this if we want to use Apple's atomics.
|
|
/*
|
|
#if NV_OS_IOS || NV_OS_DARWIN
|
|
// for iOS & OSX we use apple's atomics
|
|
#include "libkern/OSAtomic.h"
|
|
#endif
|
|
*/
|
|
|
|
namespace nv {
|
|
|
|
// Load and stores.
|
|
inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; }
|
|
inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; }
|
|
|
|
inline uint32 loadAcquire(const volatile uint32 * ptr)
|
|
{
|
|
nvDebugCheck((intptr_t(ptr) & 3) == 0);
|
|
|
|
#if POSH_CPU_X86 || POSH_CPU_X86_64
|
|
uint32 ret = *ptr; // on x86, loads are Acquire
|
|
nvCompilerReadBarrier();
|
|
return ret;
|
|
#elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64
|
|
// need more specific cpu type for armv7?
|
|
// also utilizes a full barrier
|
|
// currently treating laod like x86 - this could be wrong
|
|
|
|
// this is the easiest but slowest way to do this
|
|
nvCompilerReadWriteBarrier();
|
|
uint32 ret = *ptr; // replace with ldrex?
|
|
nvCompilerReadWriteBarrier();
|
|
return ret;
|
|
#else
|
|
#error "Not implemented"
|
|
#endif
|
|
}
|
|
|
|
inline void storeRelease(volatile uint32 * ptr, uint32 value)
|
|
{
|
|
nvDebugCheck((intptr_t(ptr) & 3) == 0);
|
|
nvDebugCheck((intptr_t(&value) & 3) == 0);
|
|
|
|
#if POSH_CPU_X86 || POSH_CPU_X86_64
|
|
nvCompilerWriteBarrier();
|
|
*ptr = value; // on x86, stores are Release
|
|
//nvCompilerWriteBarrier(); // @@ IC: Where does this barrier go? In nvtt it was after, in Witness before. Not sure which one is right.
|
|
#elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64
|
|
// this is the easiest but slowest way to do this
|
|
nvCompilerReadWriteBarrier();
|
|
*ptr = value; //strex?
|
|
nvCompilerReadWriteBarrier();
|
|
#else
|
|
#error "Atomics not implemented."
|
|
#endif
|
|
}
|
|
|
|
|
|
template <typename T>
|
|
inline void storeReleasePointer(volatile T * pTo, T from)
|
|
{
|
|
NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t));
|
|
nvDebugCheck((((intptr_t)pTo) % sizeof(intptr_t)) == 0);
|
|
nvDebugCheck((((intptr_t)&from) % sizeof(intptr_t)) == 0);
|
|
nvCompilerWriteBarrier();
|
|
*pTo = from; // on x86, stores are Release
|
|
}
|
|
|
|
template <typename T>
|
|
inline T loadAcquirePointer(volatile T * ptr)
|
|
{
|
|
NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t));
|
|
nvDebugCheck((((intptr_t)ptr) % sizeof(intptr_t)) == 0);
|
|
T ret = *ptr; // on x86, loads are Acquire
|
|
nvCompilerReadBarrier();
|
|
return ret;
|
|
}
|
|
|
|
|
|
// Atomics. @@ Assuming sequential memory order?
|
|
|
|
#if NV_CC_MSVC
|
|
NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long));
|
|
|
|
// Returns incremented value.
|
|
inline uint32 atomicIncrement(uint32 * value)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return uint32(_InterlockedIncrement((long *)value));
|
|
}
|
|
|
|
// Returns decremented value.
|
|
inline uint32 atomicDecrement(uint32 * value)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return uint32(_InterlockedDecrement((long *)value));
|
|
}
|
|
|
|
// Returns added value.
|
|
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add)) + value_to_add;
|
|
}
|
|
|
|
// Returns original value before addition.
|
|
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add));
|
|
}
|
|
|
|
|
|
|
|
|
|
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
|
|
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
|
|
// @@ Is this strong or weak? Does InterlockedCompareExchange have spurious failures?
|
|
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
long result = _InterlockedCompareExchange((long *)value, (long)desired, (long)expected);
|
|
return result == (long)expected;
|
|
}
|
|
|
|
|
|
inline uint32 atomicSwap(uint32 * value, uint32 desired)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return (uint32)_InterlockedExchange((long *)value, (long)desired);
|
|
}
|
|
|
|
|
|
|
|
#elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN)
|
|
NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long));
|
|
|
|
//ACS: Use Apple's atomics instead? I don't know if these are better in any way; there are non-barrier versions too. There's no OSAtomicSwap32 tho'
|
|
/*
|
|
inline uint32 atomicIncrement(uint32 * value)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return (uint32)OSAtomicIncrement32Barrier((int32_t *)value);
|
|
}
|
|
|
|
inline uint32 atomicDecrement(uint32 * value)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return (uint32)OSAtomicDecrement32Barrier((int32_t *)value);
|
|
}
|
|
|
|
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
|
|
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
|
|
// @@ Is this strong or weak?
|
|
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return OSAtomicCompareAndSwap32Barrier((int32_t)expected, (int32_t)desired, (int32_t *)value);
|
|
}
|
|
*/
|
|
|
|
// Returns incremented value.
|
|
inline uint32 atomicIncrement(uint32 * value) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_add_and_fetch(value, 1);
|
|
}
|
|
|
|
// Returns decremented value.
|
|
inline uint32 atomicDecrement(uint32 * value) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_sub_and_fetch(value, 1);
|
|
}
|
|
|
|
// Returns added value.
|
|
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_add_and_fetch(value, value_to_add);
|
|
}
|
|
|
|
// Returns original value before addition.
|
|
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_fetch_and_add(value, value_to_add);
|
|
}
|
|
|
|
|
|
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
|
|
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
|
|
// @@ Is this strong or weak?
|
|
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_bool_compare_and_swap(value, expected, desired);
|
|
}
|
|
|
|
inline uint32 atomicSwap(uint32 * value, uint32 desired)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
// this is confusingly named, it doesn't actually do a test but always sets
|
|
return __sync_lock_test_and_set(value, desired);
|
|
}
|
|
|
|
|
|
|
|
|
|
#elif NV_CC_CLANG && POSH_CPU_STRONGARM
|
|
NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long));
|
|
|
|
inline uint32 atomicIncrement(uint32 * value)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
|
|
// this should work in LLVM eventually, but not as of 2.1
|
|
// return (uint32)AtomicIncrement((long *)value);
|
|
|
|
// in the mean time,
|
|
register uint32 result;
|
|
asm volatile (
|
|
"1: ldrexb %0, [%1] \n\t"
|
|
"add %0, %0, #1 \n\t"
|
|
"strexb r1, %0, [%1] \n\t"
|
|
"cmp r1, #0 \n\t"
|
|
"bne 1b"
|
|
: "=&r" (result)
|
|
: "r"(value)
|
|
: "r1"
|
|
);
|
|
return result;
|
|
|
|
}
|
|
|
|
inline uint32 atomicDecrement(uint32 * value)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
|
|
// this should work in LLVM eventually, but not as of 2.1:
|
|
// return (uint32)sys::AtomicDecrement((long *)value);
|
|
|
|
// in the mean time,
|
|
|
|
register uint32 result;
|
|
asm volatile (
|
|
"1: ldrexb %0, [%1] \n\t"
|
|
"sub %0, %0, #1 \n\t"
|
|
"strexb r1, %0, [%1] \n\t"
|
|
"cmp r1, #0 \n\t"
|
|
"bne 1b"
|
|
: "=&r" (result)
|
|
: "r"(value)
|
|
: "r1"
|
|
);
|
|
return result;
|
|
|
|
}
|
|
|
|
#elif NV_CC_GNUC
|
|
// Many alternative implementations at:
|
|
// http://www.memoryhole.net/kyle/2007/05/atomic_incrementing.html
|
|
|
|
// Returns incremented value.
|
|
inline uint32 atomicIncrement(uint32 * value) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_add_and_fetch(value, 1);
|
|
}
|
|
|
|
// Returns decremented value.
|
|
inline uint32 atomicDecrement(uint32 * value) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_sub_and_fetch(value, 1);
|
|
}
|
|
|
|
// Returns added value.
|
|
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_add_and_fetch(value, value_to_add);
|
|
}
|
|
|
|
// Returns original value before addition.
|
|
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_fetch_and_add(value, value_to_add);
|
|
}
|
|
|
|
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
|
|
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
|
|
// @@ Is this strong or weak?
|
|
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
return __sync_bool_compare_and_swap(value, expected, desired);
|
|
}
|
|
|
|
inline uint32 atomicSwap(uint32 * value, uint32 desired)
|
|
{
|
|
nvDebugCheck((intptr_t(value) & 3) == 0);
|
|
// this is confusingly named, it doesn't actually do a test but always sets
|
|
return __sync_lock_test_and_set(value, desired);
|
|
}
|
|
|
|
#else
|
|
#error "Atomics not implemented."
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
// It would be nice to have C++0x-style atomic types, but I'm not in the mood right now. Only uint32 supported so far.
|
|
#if 0
|
|
template <typename T>
|
|
void increment(T * value);
|
|
|
|
template <typename T>
|
|
void decrement(T * value);
|
|
|
|
template <>
|
|
void increment(uint32 * value) {
|
|
}
|
|
|
|
template <>
|
|
void increment(uint64 * value) {
|
|
}
|
|
|
|
|
|
|
|
template <typename T>
|
|
class Atomic
|
|
{
|
|
public:
|
|
explicit Atomic() : m_value() { }
|
|
explicit Atomic( T val ) : m_value(val) { }
|
|
~Atomic() { }
|
|
|
|
T loadRelaxed() const { return m_value; }
|
|
void storeRelaxed(T val) { m_value = val; }
|
|
|
|
//T loadAcquire() const volatile { return nv::loadAcquire(&m_value); }
|
|
//void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); }
|
|
|
|
void increment() /*volatile*/ { nv::atomicIncrement(m_value); }
|
|
void decrement() /*volatile*/ { nv::atomicDecrement(m_value); }
|
|
|
|
void compareAndStore(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
|
|
T compareAndExchange(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
|
|
T exchange(T newVal) { nv::atomicExchange(&m_value, newVal); }
|
|
|
|
private:
|
|
// don't provide operator = or == ; make the client write Store( Load() )
|
|
NV_FORBID_COPY(Atomic);
|
|
|
|
NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64));
|
|
|
|
T m_value;
|
|
};
|
|
#endif
|
|
|
|
} // nv namespace
|
|
|
|
|
|
#endif // NV_THREADS_ATOMICS_H
|