diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index d354b3c..bb7a851 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -141,6 +141,7 @@ CHECK_INCLUDE_FILES(stdarg.h HAVE_STDARG_H) CHECK_INCLUDE_FILES(signal.h HAVE_SIGNAL_H) CHECK_INCLUDE_FILES(execinfo.h HAVE_EXECINFO_H) CHECK_INCLUDE_FILES(malloc.h HAVE_MALLOC_H) +CHECK_INCLUDE_FILES(dispatch/dispatch.h HAVE_DISPATCH_H) CONFIGURE_FILE(${CMAKE_CURRENT_SOURCE_DIR}/nvconfig.h.in ${CMAKE_CURRENT_BINARY_DIR}/nvconfig.h) diff --git a/src/nvconfig.h.in b/src/nvconfig.h.in index 9726768..0142d96 100644 --- a/src/nvconfig.h.in +++ b/src/nvconfig.h.in @@ -8,6 +8,7 @@ #cmakedefine HAVE_MALLOC_H #cmakedefine HAVE_OPENMP +#cmakedefine HAVE_DISPATCH_H #cmakedefine HAVE_PNG #cmakedefine HAVE_JPEG diff --git a/src/nvmath/SimdVector_VE.h b/src/nvmath/SimdVector_VE.h index dabb525..c850557 100644 --- a/src/nvmath/SimdVector_VE.h +++ b/src/nvmath/SimdVector_VE.h @@ -35,12 +35,13 @@ namespace nv { class SimdVector { + public: vector float vec; typedef SimdVector Arg; SimdVector() {} - explicit SimdVector(float v) : vec((vector float)(X)) {} + explicit SimdVector(float v) : vec((vector float)(v)) {} explicit SimdVector(vector float v) : vec(v) {} SimdVector(const SimdVector & arg) : vec(arg.vec) {} @@ -115,34 +116,34 @@ namespace nv { } }; - SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right ) + inline SimdVector operator+( SimdVector::Arg left, SimdVector::Arg right ) { return SimdVector( vec_add( left.vec, right.vec ) ); } - SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right ) + inline SimdVector operator-( SimdVector::Arg left, SimdVector::Arg right ) { return SimdVector( vec_sub( left.vec, right.vec ) ); } - SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right ) + inline SimdVector operator*( SimdVector::Arg left, SimdVector::Arg right ) { return SimdVector( vec_madd( left.vec, right.vec, ( vector float )( -0.0f ) ) ); } // Returns a*b + c - SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + inline SimdVector multiplyAdd( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) { return SimdVector( vec_madd( a.vec, b.vec, c.vec ) ); } // Returns -( a*b - c ) - SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) + inline SimdVector negativeMultiplySubtract( SimdVector::Arg a, SimdVector::Arg b, SimdVector::Arg c ) { return SimdVector( vec_nmsub( a.vec, b.vec, c.vec ) ); } - SimdVector reciprocal( SimdVector::Arg v ) + inline SimdVector reciprocal( SimdVector::Arg v ) { // get the reciprocal estimate vector float estimate = vec_re( v.vec ); @@ -152,32 +153,32 @@ namespace nv { return SimdVector( vec_madd( diff, estimate, estimate ) ); } - SimdVector min( SimdVector::Arg left, SimdVector::Arg right ) + inline SimdVector min( SimdVector::Arg left, SimdVector::Arg right ) { return SimdVector( vec_min( left.vec, right.vec ) ); } - SimdVector max( SimdVector::Arg left, SimdVector::Arg right ) + inline SimdVector max( SimdVector::Arg left, SimdVector::Arg right ) { return SimdVector( vec_max( left.vec, right.vec ) ); } - SimdVector truncate( SimdVector::Arg v ) + inline SimdVector truncate( SimdVector::Arg v ) { return SimdVector( vec_trunc( v.vec ) ); } - SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right ) + inline SimdVector compareEqual( SimdVector::Arg left, SimdVector::Arg right ) { return SimdVector( ( vector float )vec_cmpeq( left.vec, right.vec ) ); } - SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits ) + inline SimdVector select( SimdVector::Arg off, SimdVector::Arg on, SimdVector::Arg bits ) { return SimdVector( vec_sel( off.vec, on.vec, ( vector unsigned int )bits.vec ) ); } - bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) + inline bool compareAnyLessThan( SimdVector::Arg left, SimdVector::Arg right ) { return vec_any_lt( left.vec, right.vec ) != 0; } diff --git a/src/nvtt/CompressorDXT.cpp b/src/nvtt/CompressorDXT.cpp index 76f4ed6..a621feb 100644 --- a/src/nvtt/CompressorDXT.cpp +++ b/src/nvtt/CompressorDXT.cpp @@ -156,10 +156,10 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c context.compressor = this; static SequentialTaskDispatcher sequential; - static AppleTaskDispatcher concurrent; + //#static AppleTaskDispatcher concurrent; - //TaskDispatcher * dispatcher = &sequential; - TaskDispatcher * dispatcher = &concurrent; + TaskDispatcher * dispatcher = &sequential; + //TaskDispatcher * dispatcher = &concurrent; // Use a single thread to compress small textures. if (context.bh < 4) dispatcher = &sequential; diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp index 27a7837..5e5a794 100644 --- a/src/nvtt/Context.cpp +++ b/src/nvtt/Context.cpp @@ -56,8 +56,6 @@ using namespace nv; using namespace nvtt; -#include - Compressor::Compressor() : m(*new Compressor::Private()) { // CUDA initialization. diff --git a/src/nvtt/TaskDispatcher.h b/src/nvtt/TaskDispatcher.h index f11bd83..7dd81af 100644 --- a/src/nvtt/TaskDispatcher.h +++ b/src/nvtt/TaskDispatcher.h @@ -6,8 +6,7 @@ #include #endif -#if NV_OS_DARWIN -//#if defined(HAVE_DISPATCH) +#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) #include #endif @@ -22,7 +21,7 @@ namespace nvtt { } }; -#if NV_OS_DARWIN +#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) // Task dispatcher using Apple's Grand Central Dispatch. struct AppleTaskDispatcher : public TaskDispatcher