E2K: added initial support for MCST Elbrus 2000

pull/303/head
r-a-sattarov 5 years ago
parent 261948f076
commit 809c6df193

File diff suppressed because it is too large Load Diff

@ -56,7 +56,7 @@
# define NV_OS_MINGW 1
# define NV_OS_WIN32 1
#elif defined POSH_OS_OSX
# define NV_OS_OSX 1 // IC: Adding this, because iOS defines NV_OS_DARWIN too.
# define NV_OS_OSX 1 // IC: Adding this, because iOS defines NV_OS_DARWIN too.
# define NV_OS_DARWIN 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_IOS
@ -98,6 +98,7 @@
// NV_CPU_PPC
// NV_CPU_ARM
// NV_CPU_ARM_64
// NV_CPU_E2K /* MCST Elbrus 2000 */
#define NV_CPU_STRING POSH_CPU_STRING
@ -112,6 +113,8 @@
# define NV_CPU_ARM 1
#elif defined POSH_CPU_AARCH64
# define NV_CPU_ARM_64 1
#elif defined POSH_CPU_E2K
# define NV_CPU_E2K 1
#else
# error "Unsupported CPU"
#endif
@ -124,7 +127,7 @@
#if defined POSH_COMPILER_CLANG
# define NV_CC_CLANG 1
# define NV_CC_GNUC 1 // Clang is compatible with GCC.
# define NV_CC_GNUC 1 // Clang is compatible with GCC.
# define NV_CC_STRING "clang"
#elif defined POSH_COMPILER_GCC
# define NV_CC_GNUC 1
@ -232,7 +235,7 @@ typedef uint32 uint;
NV_ENDIAN_STRING"-endian - " __DATE__ "-" __TIME__
// Disable copy constructor and assignment operator.
// Disable copy constructor and assignment operator.
#if NV_CC_CPP11
#define NV_FORBID_COPY(C) \
C( const C & ) = delete; \
@ -244,7 +247,7 @@ typedef uint32 uint;
C &operator=( const C & )
#endif
// Disable dynamic allocation on the heap.
// Disable dynamic allocation on the heap.
// See Prohibiting Heap-Based Objects in More Effective C++.
#define NV_FORBID_HEAPALLOC() \
private: \
@ -262,12 +265,12 @@ typedef uint32 uint;
#define NV_STRING(x) NV_STRING2(x)
#if NV_CC_MSVC
#define NV_MULTI_LINE_MACRO_BEGIN do {
#define NV_MULTI_LINE_MACRO_BEGIN do {
#define NV_MULTI_LINE_MACRO_END \
__pragma(warning(push)) \
__pragma(warning(disable:4127)) \
} while(false) \
__pragma(warning(pop))
__pragma(warning(pop))
#else
#define NV_MULTI_LINE_MACRO_BEGIN do {
#define NV_MULTI_LINE_MACRO_END } while(false)
@ -278,7 +281,7 @@ typedef uint32 uint;
#else
#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
#endif
#define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best.
#define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best.
// Make sure type definitions are fine.
NV_COMPILER_CHECK(sizeof(int8) == 1);
@ -302,7 +305,7 @@ template <typename T, size_t N> char (&ArraySizeHelper(T (&array)[N]))[N];
#define NV_MESSAGE(x) message(x)
#endif
#else
#define NV_MESSAGE(x)
#define NV_MESSAGE(x)
#endif

@ -1,413 +1,428 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_THREAD_ATOMIC_H
#define NV_THREAD_ATOMIC_H
#include "nvthread.h"
#include "nvcore/Debug.h"
#if NV_CC_MSVC
#include <intrin.h> // Already included by nvthread.h
#pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement)
#pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange)
//#pragma intrinsic(_InterlockedExchangeAdd64)
/*
extern "C"
{
#pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement)
LONG __cdecl _InterlockedIncrement(long volatile *Addend);
LONG __cdecl _InterlockedDecrement(long volatile *Addend);
#pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange)
LONG __cdecl _InterlockedCompareExchange(long volatile * Destination, long Exchange, long Compared);
LONG __cdecl _InterlockedExchange(long volatile * Target, LONG Value);
}
*/
#endif // NV_CC_MSVC
#if NV_CC_CLANG && POSH_CPU_STRONGARM
// LLVM/Clang do not yet have functioning atomics as of 2.1
// #include <atomic>
#endif
//ACS: need this if we want to use Apple's atomics.
/*
#if NV_OS_IOS || NV_OS_DARWIN
// for iOS & OSX we use apple's atomics
#include "libkern/OSAtomic.h"
#endif
*/
namespace nv {
// Load and stores.
inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; }
inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; }
inline uint32 loadAcquire(const volatile uint32 * ptr)
{
nvDebugCheck((intptr_t(ptr) & 3) == 0);
#if POSH_CPU_X86 || POSH_CPU_X86_64
uint32 ret = *ptr; // on x86, loads are Acquire
nvCompilerReadBarrier();
return ret;
#elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64
// need more specific cpu type for armv7?
// also utilizes a full barrier
// currently treating laod like x86 - this could be wrong
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
uint32 ret = *ptr; // replace with ldrex?
nvCompilerReadWriteBarrier();
return ret;
#elif POSH_CPU_PPC64
// need more specific cpu type for ppc64?
// also utilizes a full barrier
// currently treating load like x86 - this could be wrong
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
uint32 ret = *ptr; // replace with ldrex?
nvCompilerReadWriteBarrier();
return ret;
#else
#error "Not implemented"
#endif
}
inline void storeRelease(volatile uint32 * ptr, uint32 value)
{
nvDebugCheck((intptr_t(ptr) & 3) == 0);
nvDebugCheck((intptr_t(&value) & 3) == 0);
#if POSH_CPU_X86 || POSH_CPU_X86_64
nvCompilerWriteBarrier();
*ptr = value; // on x86, stores are Release
//nvCompilerWriteBarrier(); // @@ IC: Where does this barrier go? In nvtt it was after, in Witness before. Not sure which one is right.
#elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
*ptr = value; //strex?
nvCompilerReadWriteBarrier();
#elif POSH_CPU_PPC64
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
*ptr = value; //strex?
nvCompilerReadWriteBarrier();
#else
#error "Atomics not implemented."
#endif
}
inline void storeRelease(volatile float * ptr, float value)
{
storeRelease((uint32 *)ptr, *(uint32 *)&value);
}
template <typename T>
inline void storeReleasePointer(volatile T * pTo, T from)
{
NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t));
nvDebugCheck((((intptr_t)pTo) % sizeof(intptr_t)) == 0);
nvDebugCheck((((intptr_t)&from) % sizeof(intptr_t)) == 0);
nvCompilerWriteBarrier();
*pTo = from; // on x86, stores are Release
}
template <typename T>
inline T loadAcquirePointer(volatile T * ptr)
{
NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t));
nvDebugCheck((((intptr_t)ptr) % sizeof(intptr_t)) == 0);
T ret = *ptr; // on x86, loads are Acquire
nvCompilerReadBarrier();
return ret;
}
// Atomics. @@ Assuming sequential memory order?
#if NV_CC_MSVC
NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long));
// Returns incremented value.
inline uint32 atomicIncrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return uint32(_InterlockedIncrement((long *)value));
}
// Returns decremented value.
inline uint32 atomicDecrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return uint32(_InterlockedDecrement((long *)value));
}
// Returns added value.
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add)) + value_to_add;
}
// Returns original value before addition.
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add));
}
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak? Does InterlockedCompareExchange have spurious failures?
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
long result = _InterlockedCompareExchange((long *)value, (long)desired, (long)expected);
return result == (long)expected;
}
inline uint32 atomicSwap(uint32 * value, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return (uint32)_InterlockedExchange((long *)value, (long)desired);
}
#elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN)
//ACS: Use Apple's atomics instead? I don't know if these are better in any way; there are non-barrier versions too. There's no OSAtomicSwap32 tho'
/*
inline uint32 atomicIncrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return (uint32)OSAtomicIncrement32Barrier((int32_t *)value);
}
inline uint32 atomicDecrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return (uint32)OSAtomicDecrement32Barrier((int32_t *)value);
}
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak?
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return OSAtomicCompareAndSwap32Barrier((int32_t)expected, (int32_t)desired, (int32_t *)value);
}
*/
// Returns incremented value.
inline uint32 atomicIncrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_add_and_fetch(value, 1);
}
// Returns decremented value.
inline uint32 atomicDecrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_sub_and_fetch(value, 1);
}
// Returns added value.
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_add_and_fetch(value, value_to_add);
}
// Returns original value before addition.
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_fetch_and_add(value, value_to_add);
}
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak?
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_bool_compare_and_swap(value, expected, desired);
}
inline uint32 atomicSwap(uint32 * value, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
// this is confusingly named, it doesn't actually do a test but always sets
return __sync_lock_test_and_set(value, desired);
}
#elif NV_CC_CLANG && POSH_CPU_STRONGARM
inline uint32 atomicIncrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
// this should work in LLVM eventually, but not as of 2.1
// return (uint32)AtomicIncrement((long *)value);
// in the mean time,
register uint32 result;
asm volatile (
"1: ldrexb %0, [%1] \n\t"
"add %0, %0, #1 \n\t"
"strexb r1, %0, [%1] \n\t"
"cmp r1, #0 \n\t"
"bne 1b"
: "=&r" (result)
: "r"(value)
: "r1"
);
return result;
}
inline uint32 atomicDecrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
// this should work in LLVM eventually, but not as of 2.1:
// return (uint32)sys::AtomicDecrement((long *)value);
// in the mean time,
register uint32 result;
asm volatile (
"1: ldrexb %0, [%1] \n\t"
"sub %0, %0, #1 \n\t"
"strexb r1, %0, [%1] \n\t"
"cmp r1, #0 \n\t"
"bne 1b"
: "=&r" (result)
: "r"(value)
: "r1"
);
return result;
}
#elif NV_CC_GNUC
// Many alternative implementations at:
// http://www.memoryhole.net/kyle/2007/05/atomic_incrementing.html
// Returns incremented value.
inline uint32 atomicIncrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_add_and_fetch(value, 1);
}
// Returns decremented value.
inline uint32 atomicDecrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_sub_and_fetch(value, 1);
}
// Returns added value.
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_add_and_fetch(value, value_to_add);
}
// Returns original value before addition.
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_fetch_and_add(value, value_to_add);
}
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak?
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_bool_compare_and_swap(value, expected, desired);
}
inline uint32 atomicSwap(uint32 * value, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
// this is confusingly named, it doesn't actually do a test but always sets
return __sync_lock_test_and_set(value, desired);
}
#else
#error "Atomics not implemented."
#endif
// It would be nice to have C++0x-style atomic types, but I'm not in the mood right now. Only uint32 supported so far.
#if 0
template <typename T>
void increment(T * value);
template <typename T>
void decrement(T * value);
template <>
void increment(uint32 * value) {
}
template <>
void increment(uint64 * value) {
}
template <typename T>
class Atomic
{
public:
explicit Atomic() : m_value() { }
explicit Atomic( T val ) : m_value(val) { }
~Atomic() { }
T loadRelaxed() const { return m_value; }
void storeRelaxed(T val) { m_value = val; }
//T loadAcquire() const volatile { return nv::loadAcquire(&m_value); }
//void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); }
void increment() /*volatile*/ { nv::atomicIncrement(m_value); }
void decrement() /*volatile*/ { nv::atomicDecrement(m_value); }
void compareAndStore(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
T compareAndExchange(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
T exchange(T newVal) { nv::atomicExchange(&m_value, newVal); }
private:
// don't provide operator = or == ; make the client write Store( Load() )
NV_FORBID_COPY(Atomic);
NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64));
T m_value;
};
#endif
} // nv namespace
#endif // NV_THREADS_ATOMICS_H
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_THREAD_ATOMIC_H
#define NV_THREAD_ATOMIC_H
#include "nvthread.h"
#include "nvcore/Debug.h"
#if NV_CC_MSVC
#include <intrin.h> // Already included by nvthread.h
#pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement)
#pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange)
//#pragma intrinsic(_InterlockedExchangeAdd64)
/*
extern "C"
{
#pragma intrinsic(_InterlockedIncrement, _InterlockedDecrement)
LONG __cdecl _InterlockedIncrement(long volatile *Addend);
LONG __cdecl _InterlockedDecrement(long volatile *Addend);
#pragma intrinsic(_InterlockedCompareExchange, _InterlockedExchange)
LONG __cdecl _InterlockedCompareExchange(long volatile * Destination, long Exchange, long Compared);
LONG __cdecl _InterlockedExchange(long volatile * Target, LONG Value);
}
*/
#endif // NV_CC_MSVC
#if NV_CC_CLANG && POSH_CPU_STRONGARM
// LLVM/Clang do not yet have functioning atomics as of 2.1
// #include <atomic>
#endif
//ACS: need this if we want to use Apple's atomics.
/*
#if NV_OS_IOS || NV_OS_DARWIN
// for iOS & OSX we use apple's atomics
#include "libkern/OSAtomic.h"
#endif
*/
namespace nv {
// Load and stores.
inline uint32 loadRelaxed(const uint32 * ptr) { return *ptr; }
inline void storeRelaxed(uint32 * ptr, uint32 value) { *ptr = value; }
inline uint32 loadAcquire(const volatile uint32 * ptr)
{
nvDebugCheck((intptr_t(ptr) & 3) == 0);
#if POSH_CPU_X86 || POSH_CPU_X86_64
uint32 ret = *ptr; // on x86, loads are Acquire
nvCompilerReadBarrier();
return ret;
#elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64
// need more specific cpu type for armv7?
// also utilizes a full barrier
// currently treating load like x86 - this could be wrong
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
uint32 ret = *ptr; // replace with ldrex?
nvCompilerReadWriteBarrier();
return ret;
#elif POSH_CPU_PPC64
// need more specific cpu type for ppc64?
// also utilizes a full barrier
// currently treating load like x86 - this could be wrong
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
uint32 ret = *ptr; // replace with ldrex?
nvCompilerReadWriteBarrier();
return ret;
#elif POSH_CPU_E2K
// need more specific cpu type for e2k?
// also utilizes a full barrier
// currently treating load like x86 - this could be wrong
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
uint32 ret = *ptr; // replace with ldrex?
nvCompilerReadWriteBarrier();
return ret;
#else
#error "Not implemented"
#endif
}
inline void storeRelease(volatile uint32 * ptr, uint32 value)
{
nvDebugCheck((intptr_t(ptr) & 3) == 0);
nvDebugCheck((intptr_t(&value) & 3) == 0);
#if POSH_CPU_X86 || POSH_CPU_X86_64
nvCompilerWriteBarrier();
*ptr = value; // on x86, stores are Release
//nvCompilerWriteBarrier(); // @@ IC: Where does this barrier go? In nvtt it was after, in Witness before. Not sure which one is right.
#elif POSH_CPU_STRONGARM || POSH_CPU_AARCH64
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
*ptr = value; //strex?
nvCompilerReadWriteBarrier();
#elif POSH_CPU_PPC64
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
*ptr = value; //strex?
nvCompilerReadWriteBarrier();
#elif POSH_CPU_E2K
// this is the easiest but slowest way to do this
nvCompilerReadWriteBarrier();
*ptr = value; //strex?
nvCompilerReadWriteBarrier();
#else
#error "Atomics not implemented."
#endif
}
inline void storeRelease(volatile float * ptr, float value)
{
storeRelease((uint32 *)ptr, *(uint32 *)&value);
}
template <typename T>
inline void storeReleasePointer(volatile T * pTo, T from)
{
NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t));
nvDebugCheck((((intptr_t)pTo) % sizeof(intptr_t)) == 0);
nvDebugCheck((((intptr_t)&from) % sizeof(intptr_t)) == 0);
nvCompilerWriteBarrier();
*pTo = from; // on x86, stores are Release
}
template <typename T>
inline T loadAcquirePointer(volatile T * ptr)
{
NV_COMPILER_CHECK(sizeof(T) == sizeof(intptr_t));
nvDebugCheck((((intptr_t)ptr) % sizeof(intptr_t)) == 0);
T ret = *ptr; // on x86, loads are Acquire
nvCompilerReadBarrier();
return ret;
}
// Atomics. @@ Assuming sequential memory order?
#if NV_CC_MSVC
NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long));
// Returns incremented value.
inline uint32 atomicIncrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return uint32(_InterlockedIncrement((long *)value));
}
// Returns decremented value.
inline uint32 atomicDecrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return uint32(_InterlockedDecrement((long *)value));
}
// Returns added value.
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add)) + value_to_add;
}
// Returns original value before addition.
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add));
}
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak? Does InterlockedCompareExchange have spurious failures?
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
long result = _InterlockedCompareExchange((long *)value, (long)desired, (long)expected);
return result == (long)expected;
}
inline uint32 atomicSwap(uint32 * value, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return (uint32)_InterlockedExchange((long *)value, (long)desired);
}
#elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN)
//ACS: Use Apple's atomics instead? I don't know if these are better in any way; there are non-barrier versions too. There's no OSAtomicSwap32 tho'
/*
inline uint32 atomicIncrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return (uint32)OSAtomicIncrement32Barrier((int32_t *)value);
}
inline uint32 atomicDecrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return (uint32)OSAtomicDecrement32Barrier((int32_t *)value);
}
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak?
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return OSAtomicCompareAndSwap32Barrier((int32_t)expected, (int32_t)desired, (int32_t *)value);
}
*/
// Returns incremented value.
inline uint32 atomicIncrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_add_and_fetch(value, 1);
}
// Returns decremented value.
inline uint32 atomicDecrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_sub_and_fetch(value, 1);
}
// Returns added value.
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_add_and_fetch(value, value_to_add);
}
// Returns original value before addition.
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_fetch_and_add(value, value_to_add);
}
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak?
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_bool_compare_and_swap(value, expected, desired);
}
inline uint32 atomicSwap(uint32 * value, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
// this is confusingly named, it doesn't actually do a test but always sets
return __sync_lock_test_and_set(value, desired);
}
#elif NV_CC_CLANG && POSH_CPU_STRONGARM
inline uint32 atomicIncrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
// this should work in LLVM eventually, but not as of 2.1
// return (uint32)AtomicIncrement((long *)value);
// in the mean time,
register uint32 result;
asm volatile (
"1: ldrexb %0, [%1] \n\t"
"add %0, %0, #1 \n\t"
"strexb r1, %0, [%1] \n\t"
"cmp r1, #0 \n\t"
"bne 1b"
: "=&r" (result)
: "r"(value)
: "r1"
);
return result;
}
inline uint32 atomicDecrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
// this should work in LLVM eventually, but not as of 2.1:
// return (uint32)sys::AtomicDecrement((long *)value);
// in the mean time,
register uint32 result;
asm volatile (
"1: ldrexb %0, [%1] \n\t"
"sub %0, %0, #1 \n\t"
"strexb r1, %0, [%1] \n\t"
"cmp r1, #0 \n\t"
"bne 1b"
: "=&r" (result)
: "r"(value)
: "r1"
);
return result;
}
#elif NV_CC_GNUC
// Many alternative implementations at:
// http://www.memoryhole.net/kyle/2007/05/atomic_incrementing.html
// Returns incremented value.
inline uint32 atomicIncrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_add_and_fetch(value, 1);
}
// Returns decremented value.
inline uint32 atomicDecrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_sub_and_fetch(value, 1);
}
// Returns added value.
inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_add_and_fetch(value, value_to_add);
}
// Returns original value before addition.
inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_fetch_and_add(value, value_to_add);
}
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak?
inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
return __sync_bool_compare_and_swap(value, expected, desired);
}
inline uint32 atomicSwap(uint32 * value, uint32 desired)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
// this is confusingly named, it doesn't actually do a test but always sets
return __sync_lock_test_and_set(value, desired);
}
#else
#error "Atomics not implemented."
#endif
// It would be nice to have C++0x-style atomic types, but I'm not in the mood right now. Only uint32 supported so far.
#if 0
template <typename T>
void increment(T * value);
template <typename T>
void decrement(T * value);
template <>
void increment(uint32 * value) {
}
template <>
void increment(uint64 * value) {
}
template <typename T>
class Atomic
{
public:
explicit Atomic() : m_value() { }
explicit Atomic( T val ) : m_value(val) { }
~Atomic() { }
T loadRelaxed() const { return m_value; }
void storeRelaxed(T val) { m_value = val; }
//T loadAcquire() const volatile { return nv::loadAcquire(&m_value); }
//void storeRelease(T val) volatile { nv::storeRelease(&m_value, val); }
void increment() /*volatile*/ { nv::atomicIncrement(m_value); }
void decrement() /*volatile*/ { nv::atomicDecrement(m_value); }
void compareAndStore(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
T compareAndExchange(T oldVal, T newVal) { nv::atomicCompareAndStore(&m_value, oldVal, newVal); }
T exchange(T newVal) { nv::atomicExchange(&m_value, newVal); }
private:
// don't provide operator = or == ; make the client write Store( Load() )
NV_FORBID_COPY(Atomic);
NV_COMPILER_CHECK(sizeof(T) == sizeof(uint32) || sizeof(T) == sizeof(uint64));
T m_value;
};
#endif
} // nv namespace
#endif // NV_THREADS_ATOMICS_H

Loading…
Cancel
Save