From 47b06d3255b47f4a85bc2d09b3a77560709dc41f Mon Sep 17 00:00:00 2001
From: Timothy Pearson <tpearson@raptorengineering.com>
Date: Sun, 26 Jun 2016 19:23:56 -0500
Subject: [PATCH 1/2] Fix build failure due to PPM save incorrect data types

Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
---
 src/nvimage/ImageIO.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/nvimage/ImageIO.cpp b/src/nvimage/ImageIO.cpp
index ec96020..e50cd7b 100644
--- a/src/nvimage/ImageIO.cpp
+++ b/src/nvimage/ImageIO.cpp
@@ -324,7 +324,7 @@ static bool savePPM(Stream & s, const Image * img)
     writer.writeString("255\n");
     for (uint i = 0; i < w * h; i++) {
         Color32 c = img->pixel(i);
-        s << c.r << c.g << c.b;
+        s << (uint8_t&)c.r << (uint8_t&)c.g << (uint8_t&)c.b;
     }
 
     return true;

From 203eda1d47c4df707ca230be5f62036b0da21292 Mon Sep 17 00:00:00 2001
From: Timothy Pearson <tpearson@raptorengineering.com>
Date: Sun, 26 Jun 2016 20:09:39 -0500
Subject: [PATCH 2/2] Fix build on PPC64LE

Signed-off-by: Timothy Pearson <tpearson@raptorengineering.com>
---
 extern/poshlib/posh.h      |  1 +
 src/nvcore/Debug.h         |  2 +-
 src/nvimage/Image.h        |  4 ++++
 src/nvmath/Half.cpp        |  2 +-
 src/nvmath/Plane.h         |  4 ++++
 src/nvmath/SimdVector_VE.h |  9 +++++----
 src/nvthread/Atomic.h      | 15 +++++++++++++++
 src/nvtt/ClusterFit.h      |  1 +
 src/nvtt/squish/simd_ve.h  |  9 +++++----
 9 files changed, 37 insertions(+), 10 deletions(-)

diff --git a/extern/poshlib/posh.h b/extern/poshlib/posh.h
index e401fb8..e87a337 100644
--- a/extern/poshlib/posh.h
+++ b/extern/poshlib/posh.h
@@ -451,6 +451,7 @@ LLVM:
 #  define POSH_CPU_PPC 1
 #  if !defined POSH_CPU_STRING
 #    if defined __powerpc64__
+#       define POSH_CPU_PPC64 1
 #       define POSH_CPU_STRING "PowerPC64"
 #    else
 #       define POSH_CPU_STRING "PowerPC"
diff --git a/src/nvcore/Debug.h b/src/nvcore/Debug.h
index 48e765e..c3a4144 100644
--- a/src/nvcore/Debug.h
+++ b/src/nvcore/Debug.h
@@ -166,7 +166,7 @@ NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((for
 namespace nv
 {
     inline bool isValidPtr(const void * ptr) {
-    #if NV_CPU_X86_64
+    #if NV_CPU_X86_64 || POSH_CPU_PPC64
         if (ptr == NULL) return true;
         if (reinterpret_cast<uint64>(ptr) < 0x10000ULL) return false;
         if (reinterpret_cast<uint64>(ptr) >= 0x000007FFFFFEFFFFULL) return false;
diff --git a/src/nvimage/Image.h b/src/nvimage/Image.h
index e39c41e..643fd9d 100644
--- a/src/nvimage/Image.h
+++ b/src/nvimage/Image.h
@@ -7,6 +7,10 @@
 #include "nvimage.h"
 #include "nvcore/Debug.h"
 
+#if NV_USE_ALTIVEC
+#undef pixel
+#endif
+
 namespace nv
 {
     class Color32;
diff --git a/src/nvmath/Half.cpp b/src/nvmath/Half.cpp
index 81465a4..c4ea48f 100644
--- a/src/nvmath/Half.cpp
+++ b/src/nvmath/Half.cpp
@@ -489,7 +489,7 @@ nv::half_to_float( uint16 h )
 }
 
 
-#if !NV_OS_IOS //ACStodoIOS some better define to choose this?
+#if !NV_OS_IOS && (defined(__i386__) || defined(__x86_64__))
 
 #if NV_CC_GNUC
 #if defined(__i386__) || defined(__x86_64__)
diff --git a/src/nvmath/Plane.h b/src/nvmath/Plane.h
index 08568e8..17e366a 100644
--- a/src/nvmath/Plane.h
+++ b/src/nvmath/Plane.h
@@ -7,6 +7,10 @@
 #include "nvmath.h"
 #include "Vector.h"
 
+#if NV_USE_ALTIVEC
+#undef vector
+#endif
+
 namespace nv
 {
     class Matrix;
diff --git a/src/nvmath/SimdVector_VE.h b/src/nvmath/SimdVector_VE.h
index c850557..57806ee 100644
--- a/src/nvmath/SimdVector_VE.h
+++ b/src/nvmath/SimdVector_VE.h
@@ -1,6 +1,7 @@
 /* -----------------------------------------------------------------------------
 
 	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+	Copyright (c) 2016 Raptor Engineering, LLC
 
 	Permission is hereby granted, free of charge, to any person obtaining
 	a copy of this software and associated documentation files (the 
@@ -41,7 +42,7 @@ namespace nv {
         typedef SimdVector Arg;
 
         SimdVector() {}
-        explicit SimdVector(float v) : vec((vector float)(v)) {}	
+        explicit SimdVector(float v) : vec(vec_splats(v)) {}	
         explicit SimdVector(vector float v) : vec(v) {}
         SimdVector(const SimdVector & arg) : vec(arg.vec) {}
 
@@ -111,7 +112,7 @@ namespace nv {
 
         SimdVector& operator*=( Arg v )
         {
-            vec = vec_madd( vec, v.vec, ( vector float )( -0.0f ) );
+            vec = vec_madd( vec, v.vec, vec_splats( -0.0f ) );
             return *this;
         }
     };
@@ -128,7 +129,7 @@ namespace nv {
 
     inline SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right  )
     {
-        return SimdVector( vec_madd( left.vec, right.vec, ( vector float )( -0.0f ) ) );
+        return SimdVector( vec_madd( left.vec, right.vec, vec_splats( -0.0f ) ) );
     }
 
     // Returns a*b + c
@@ -149,7 +150,7 @@ namespace nv {
         vector float estimate = vec_re( v.vec );
 
         // one round of Newton-Rhaphson refinement
-        vector float diff = vec_nmsub( estimate, v.vec, ( vector float )( 1.0f ) );
+        vector float diff = vec_nmsub( estimate, v.vec, vec_splats( 1.0f ) );
         return SimdVector( vec_madd( diff, estimate, estimate ) );
     }
 
diff --git a/src/nvthread/Atomic.h b/src/nvthread/Atomic.h
index e6aaa3e..212b9cb 100644
--- a/src/nvthread/Atomic.h
+++ b/src/nvthread/Atomic.h
@@ -63,6 +63,16 @@ namespace nv {
         // also utilizes a full barrier
         // currently treating laod like x86 - this could be wrong
         
+        // this is the easiest but slowest way to do this
+        nvCompilerReadWriteBarrier();
+		uint32 ret = *ptr; // replace with ldrex?
+        nvCompilerReadWriteBarrier();
+        return ret;
+#elif POSH_CPU_PPC64
+        // need more specific cpu type for ppc64?
+        // also utilizes a full barrier
+        // currently treating load like x86 - this could be wrong
+
         // this is the easiest but slowest way to do this
         nvCompilerReadWriteBarrier();
 		uint32 ret = *ptr; // replace with ldrex?
@@ -87,6 +97,11 @@ namespace nv {
         nvCompilerReadWriteBarrier();
 		*ptr = value; //strex?
 		nvCompilerReadWriteBarrier();
+#elif POSH_CPU_PPC64
+        // this is the easiest but slowest way to do this
+        nvCompilerReadWriteBarrier();
+		*ptr = value; //strex?
+		nvCompilerReadWriteBarrier();
 #else
 #error "Atomics not implemented."
 #endif
diff --git a/src/nvtt/ClusterFit.h b/src/nvtt/ClusterFit.h
index 078c9d6..7f85b6c 100644
--- a/src/nvtt/ClusterFit.h
+++ b/src/nvtt/ClusterFit.h
@@ -29,6 +29,7 @@
 
 #include "nvmath/SimdVector.h"
 #include "nvmath/Vector.h"
+#include "nvcore/Memory.h"
 
 // Use SIMD version if altivec or SSE are available.
 #define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE)
diff --git a/src/nvtt/squish/simd_ve.h b/src/nvtt/squish/simd_ve.h
index 2a9ab13..5a11858 100644
--- a/src/nvtt/squish/simd_ve.h
+++ b/src/nvtt/squish/simd_ve.h
@@ -1,6 +1,7 @@
 /* -----------------------------------------------------------------------------
 
 	Copyright (c) 2006 Simon Brown                          si@sjbrown.co.uk
+	Copyright (c) 2016 Raptor Engineering, LLC
 
 	Permission is hereby granted, free of charge, to any person obtaining
 	a copy of this software and associated documentation files (the 
@@ -33,7 +34,7 @@
 
 namespace nvsquish {
 
-#define VEC4_CONST( X ) Vec4( ( vector float )( X ) )
+#define VEC4_CONST( X ) Vec4( vec_splats( (float)X ) )
 
 class Vec4
 {
@@ -105,7 +106,7 @@ public:
 	
 	Vec4& operator*=( Arg v )
 	{
-		m_v = vec_madd( m_v, v.m_v, ( vector float )( -0.0f ) );
+		m_v = vec_madd( m_v, v.m_v, vec_splats( -0.0f ) );
 		return *this;
 	}
 	
@@ -121,7 +122,7 @@ public:
 	
 	friend Vec4 operator*( Vec4::Arg left, Vec4::Arg right  )
 	{
-		return Vec4( vec_madd( left.m_v, right.m_v, ( vector float )( -0.0f ) ) );
+		return Vec4( vec_madd( left.m_v, right.m_v, vec_splats( -0.0f ) ) );
 	}
 	
 	//! Returns a*b + c
@@ -142,7 +143,7 @@ public:
 		vector float estimate = vec_re( v.m_v );
 		
 		// one round of Newton-Rhaphson refinement
-		vector float diff = vec_nmsub( estimate, v.m_v, ( vector float )( 1.0f ) );
+		vector float diff = vec_nmsub( estimate, v.m_v, vec_splats( 1.0f ) );
 		return Vec4( vec_madd( diff, estimate, estimate ) );
 	}