From 714faa03e676d2a2dc2f67f948e2e7a770a38757 Mon Sep 17 00:00:00 2001 From: will Date: Sat, 7 Jul 2018 18:34:05 +0800 Subject: [PATCH] fix cmake script --- CMakeLists.txt | 2 + extern/CMakeLists.txt | 2 +- src/nvcore/StrLib.h | 8 +-- src/nvcore/Timer.h | 4 +- src/nvimage/BlockDXT.h | 20 +++---- src/nvimage/ColorBlock.h | 5 +- src/nvimage/DirectDrawSurface.h | 4 +- src/nvimage/ErrorMetric.h | 20 +++---- src/nvimage/FloatImage.h | 94 ++++++++++++++++----------------- src/nvimage/KtxFile.h | 2 +- src/nvimage/NormalMap.h | 8 +-- src/nvmath/Fitting.h | 42 +++++++-------- src/nvmath/Gamma.h | 4 +- src/nvmath/Half.cpp | 11 +++- src/nvmath/Half.h | 32 +++++------ src/nvmath/Matrix.h | 18 +++---- src/nvthread/CMakeLists.txt | 1 + src/nvthread/ParallelFor.h | 2 +- src/nvtt/CMakeLists.txt | 1 + 19 files changed, 148 insertions(+), 132 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 5e4bab9..8cebf8d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,11 +35,13 @@ IF(CMAKE_BUILD_TYPE MATCHES "debug") ADD_DEFINITIONS(-D_DEBUG=1) ENDIF() +OPTION(NVTT_SHARED "build shared library" OFF) IF(NVTT_SHARED) SET(NVCORE_SHARED TRUE) SET(NVMATH_SHARED TRUE) SET(NVIMAGE_SHARED TRUE) + SET(NVTHREAD_SHARED TRUE) ENDIF(NVTT_SHARED) ADD_SUBDIRECTORY(extern) diff --git a/extern/CMakeLists.txt b/extern/CMakeLists.txt index 7e41986..da8531b 100644 --- a/extern/CMakeLists.txt +++ b/extern/CMakeLists.txt @@ -9,5 +9,5 @@ ADD_SUBDIRECTORY(EtcLib) ADD_SUBDIRECTORY(rg_etc1_v104) #ADD_SUBDIRECTORY(etcpack) -ADD_SUBDIRECTORY(butteraugli) +# ADD_SUBDIRECTORY(butteraugli) diff --git a/src/nvcore/StrLib.h b/src/nvcore/StrLib.h index c6ab71d..a5484a4 100644 --- a/src/nvcore/StrLib.h +++ b/src/nvcore/StrLib.h @@ -202,11 +202,11 @@ namespace nv void stripExtension(); // statics - NVCORE_API static char separator(); - NVCORE_API static const char * fileName(const char *); - NVCORE_API static const char * extension(const char *); + static char separator(); + static const char * fileName(const char *); + static const char * extension(const char *); - NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR); + static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR); }; diff --git a/src/nvcore/Timer.h b/src/nvcore/Timer.h index a60a218..90b9e7b 100644 --- a/src/nvcore/Timer.h +++ b/src/nvcore/Timer.h @@ -30,8 +30,8 @@ namespace nv { NV_FORCEINLINE uint64 fastCpuClock() { return 0; } #endif - uint64 systemClockFrequency(); - uint64 systemClock(); + NVCORE_API uint64 systemClockFrequency(); + NVCORE_API uint64 systemClock(); class NVCORE_CLASS Timer { diff --git a/src/nvimage/BlockDXT.h b/src/nvimage/BlockDXT.h index c462761..40a2e09 100644 --- a/src/nvimage/BlockDXT.h +++ b/src/nvimage/BlockDXT.h @@ -40,7 +40,7 @@ namespace nv /// DXT1 block. - struct BlockDXT1 + struct NVIMAGE_CLASS BlockDXT1 { Color16 col0; Color16 col1; @@ -74,7 +74,7 @@ namespace nv /// DXT3 alpha block with explicit alpha. - struct AlphaBlockDXT3 + struct NVIMAGE_CLASS AlphaBlockDXT3 { union { struct { @@ -106,7 +106,7 @@ namespace nv /// DXT3 block. - struct BlockDXT3 + struct NVIMAGE_CLASS BlockDXT3 { AlphaBlockDXT3 alpha; BlockDXT1 color; @@ -120,7 +120,7 @@ namespace nv /// DXT5 alpha block. - struct AlphaBlockDXT5 + struct NVIMAGE_CLASS AlphaBlockDXT5 { union { struct { @@ -163,7 +163,7 @@ namespace nv /// DXT5 block. - struct BlockDXT5 + struct NVIMAGE_CLASS BlockDXT5 { AlphaBlockDXT5 alpha; BlockDXT1 color; @@ -176,7 +176,7 @@ namespace nv }; /// ATI1 block. - struct BlockATI1 + struct NVIMAGE_CLASS BlockATI1 { AlphaBlockDXT5 alpha; @@ -187,7 +187,7 @@ namespace nv }; /// ATI2 block. - struct BlockATI2 + struct NVIMAGE_CLASS BlockATI2 { AlphaBlockDXT5 x; AlphaBlockDXT5 y; @@ -199,7 +199,7 @@ namespace nv }; /// CTX1 block. - struct BlockCTX1 + struct NVIMAGE_CLASS BlockCTX1 { uint8 col0[2]; uint8 col1[2]; @@ -218,14 +218,14 @@ namespace nv }; /// BC6 block. - struct BlockBC6 + struct NVIMAGE_CLASS BlockBC6 { uint8 data[16]; // Not even going to try to write a union for this thing. void decodeBlock(Vector4 colors[16]) const; }; /// BC7 block. - struct BlockBC7 + struct NVIMAGE_CLASS BlockBC7 { uint8 data[16]; // Not even going to try to write a union for this thing. void decodeBlock(ColorBlock * block) const; diff --git a/src/nvimage/ColorBlock.h b/src/nvimage/ColorBlock.h index f87cb6d..78c1a64 100644 --- a/src/nvimage/ColorBlock.h +++ b/src/nvimage/ColorBlock.h @@ -4,6 +4,7 @@ #ifndef NV_IMAGE_COLORBLOCK_H #define NV_IMAGE_COLORBLOCK_H +#include "nvimage/nvimage.h" #include "nvmath/Color.h" #include "nvmath/Vector.h" @@ -14,7 +15,7 @@ namespace nv /// Uncompressed 4x4 color block. - struct ColorBlock + struct NVIMAGE_CLASS ColorBlock { ColorBlock(); ColorBlock(const uint * linearImage); @@ -128,7 +129,7 @@ namespace nv /// Uncompressed 4x4 alpha block. - struct AlphaBlock4x4 + struct NVIMAGE_CLASS AlphaBlock4x4 { void init(uint8 value); void init(const ColorBlock & src, uint channel); diff --git a/src/nvimage/DirectDrawSurface.h b/src/nvimage/DirectDrawSurface.h index 1049f06..ef37897 100644 --- a/src/nvimage/DirectDrawSurface.h +++ b/src/nvimage/DirectDrawSurface.h @@ -268,9 +268,9 @@ namespace nv DXGI_FORMAT_BC7_UNORM_SRGB = 99, }; - extern uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + NVIMAGE_API uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); - extern uint findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); + NVIMAGE_API uint findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); struct RGBAPixelFormat { diff --git a/src/nvimage/ErrorMetric.h b/src/nvimage/ErrorMetric.h index aa43d0c..826b8e3 100644 --- a/src/nvimage/ErrorMetric.h +++ b/src/nvimage/ErrorMetric.h @@ -7,19 +7,19 @@ namespace nv { class FloatImage; - float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); - float rmsAlphaError(const FloatImage * ref, const FloatImage * img); + NVIMAGE_API float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); + NVIMAGE_API float rmsAlphaError(const FloatImage * ref, const FloatImage * img); - float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); - float averageAlphaError(const FloatImage * ref, const FloatImage * img); + NVIMAGE_API float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); + NVIMAGE_API float averageAlphaError(const FloatImage * ref, const FloatImage * img); - float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight); + NVIMAGE_API float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight); - float cieLabError(const FloatImage * ref, const FloatImage * img); - float cieLab94Error(const FloatImage * ref, const FloatImage * img); - float spatialCieLabError(const FloatImage * ref, const FloatImage * img); + NVIMAGE_API float cieLabError(const FloatImage * ref, const FloatImage * img); + NVIMAGE_API float cieLab94Error(const FloatImage * ref, const FloatImage * img); + NVIMAGE_API float spatialCieLabError(const FloatImage * ref, const FloatImage * img); - float averageAngularError(const FloatImage * img0, const FloatImage * img1); - float rmsAngularError(const FloatImage * img0, const FloatImage * img1); + NVIMAGE_API float averageAngularError(const FloatImage * img0, const FloatImage * img1); + NVIMAGE_API float rmsAngularError(const FloatImage * img0, const FloatImage * img1); } // nv namespace diff --git a/src/nvimage/FloatImage.h b/src/nvimage/FloatImage.h index 42cd86a..ac063d0 100644 --- a/src/nvimage/FloatImage.h +++ b/src/nvimage/FloatImage.h @@ -24,7 +24,7 @@ namespace nv class PolyphaseKernel; /// Multicomponent floating point image class. - class FloatImage + class NVIMAGE_CLASS FloatImage { public: @@ -34,78 +34,78 @@ namespace nv WrapMode_Mirror }; - NVIMAGE_API FloatImage(); - NVIMAGE_API FloatImage(const FloatImage & img); - NVIMAGE_API FloatImage(const Image * img); - NVIMAGE_API virtual ~FloatImage(); + FloatImage(); + FloatImage(const FloatImage & img); + FloatImage(const Image * img); + virtual ~FloatImage(); /** @name Conversion. */ //@{ - NVIMAGE_API void initFrom(const Image * img); - NVIMAGE_API Image * createImage(uint base_component = 0, uint num = 4) const; - NVIMAGE_API Image * createImageGammaCorrect(float gamma = 2.2f) const; + void initFrom(const Image * img); + Image * createImage(uint base_component = 0, uint num = 4) const; + Image * createImageGammaCorrect(float gamma = 2.2f) const; //@} /** @name Allocation. */ //@{ - NVIMAGE_API void allocate(uint c, uint w, uint h, uint d = 1); - NVIMAGE_API void free(); // Does not clear members. - NVIMAGE_API void resizeChannelCount(uint c); + void allocate(uint c, uint w, uint h, uint d = 1); + void free(); // Does not clear members. + void resizeChannelCount(uint c); //@} /** @name Manipulation. */ //@{ - NVIMAGE_API void clear(float f = 0.0f); - NVIMAGE_API void clear(uint component, float f = 0.0f); - NVIMAGE_API void copyChannel(uint src, uint dst); + void clear(float f = 0.0f); + void clear(uint component, float f = 0.0f); + void copyChannel(uint src, uint dst); - NVIMAGE_API void normalize(uint base_component); + void normalize(uint base_component); - NVIMAGE_API void packNormals(uint base_component); - NVIMAGE_API void expandNormals(uint base_component); - NVIMAGE_API void scaleBias(uint base_component, uint num, float scale, float add); + void packNormals(uint base_component); + void expandNormals(uint base_component); + void scaleBias(uint base_component, uint num, float scale, float add); - NVIMAGE_API void clamp(uint base_component, uint num, float low, float high); + void clamp(uint base_component, uint num, float low, float high); - NVIMAGE_API void toLinear(uint base_component, uint num, float gamma = 2.2f); - NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f); - NVIMAGE_API void exponentiate(uint base_component, uint num, float power); + void toLinear(uint base_component, uint num, float gamma = 2.2f); + void toGamma(uint base_component, uint num, float gamma = 2.2f); + void exponentiate(uint base_component, uint num, float power); - NVIMAGE_API void transform(uint base_component, const Matrix & m, const Vector4 & offset); - NVIMAGE_API void swizzle(uint base_component, uint r, uint g, uint b, uint a); + void transform(uint base_component, const Matrix & m, const Vector4 & offset); + void swizzle(uint base_component, uint r, uint g, uint b, uint a); - NVIMAGE_API FloatImage * fastDownSample() const; - NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const; - NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const; - NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const; - NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const; - NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const; - NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const; + FloatImage * fastDownSample() const; + FloatImage * downSample(const Filter & filter, WrapMode wm) const; + FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const; + FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const; + FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const; + FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const; + FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const; - NVIMAGE_API void convolve(const Kernel2 & k, uint c, WrapMode wm); + void convolve(const Kernel2 & k, uint c, WrapMode wm); //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const; //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const; //@} - NVIMAGE_API float applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const; - NVIMAGE_API float applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; - NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; - NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; - NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const; - NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const; - NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const; + float applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const; + float applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; + float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; + float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; + void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const; + void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const; + void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const; + void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const; + void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const; + void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const; - NVIMAGE_API void flipX(); - NVIMAGE_API void flipY(); - NVIMAGE_API void flipZ(); + void flipX(); + void flipY(); + void flipZ(); - NVIMAGE_API float alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale = 1.0f) const; - NVIMAGE_API void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel); + float alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale = 1.0f) const; + void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel); uint width() const { return m_width; } diff --git a/src/nvimage/KtxFile.h b/src/nvimage/KtxFile.h index b1b3674..329a682 100644 --- a/src/nvimage/KtxFile.h +++ b/src/nvimage/KtxFile.h @@ -110,7 +110,7 @@ namespace nv const uint KTX_BASE_INTERNAL_STENCIL_INDEX = 0x1901; - struct KtxHeader { + struct NVIMAGE_CLASS KtxHeader { uint8 identifier[12]; uint32 endianness; uint32 glType; diff --git a/src/nvimage/NormalMap.h b/src/nvimage/NormalMap.h index fc484c9..6c22fc0 100644 --- a/src/nvimage/NormalMap.h +++ b/src/nvimage/NormalMap.h @@ -44,12 +44,12 @@ namespace nv }; // @@ These two functions should be deprecated: - FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); - FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); + NVIMAGE_API FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); + NVIMAGE_API FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); - FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); + NVIMAGE_API FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); - void normalizeNormalMap(FloatImage * img); + NVIMAGE_API void normalizeNormalMap(FloatImage * img); // @@ Add generation of DU/DV maps. diff --git a/src/nvmath/Fitting.h b/src/nvmath/Fitting.h index 7a88cd2..4ac8a9d 100644 --- a/src/nvmath/Fitting.h +++ b/src/nvmath/Fitting.h @@ -11,38 +11,38 @@ namespace nv { namespace Fit { - Vector3 computeCentroid(int n, const Vector3 * points); - Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + NVMATH_API Vector3 computeCentroid(int n, const Vector3 * points); + NVMATH_API Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric); - Vector4 computeCentroid(int n, const Vector4 * points); - Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + NVMATH_API Vector4 computeCentroid(int n, const Vector4 * points); + NVMATH_API Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric); - Vector3 computeCovariance(int n, const Vector3 * points, float * covariance); - Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance); + NVMATH_API Vector3 computeCovariance(int n, const Vector3 * points, float * covariance); + NVMATH_API Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance); - Vector4 computeCovariance(int n, const Vector4 * points, float * covariance); - Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance); + NVMATH_API Vector4 computeCovariance(int n, const Vector4 * points, float * covariance); + NVMATH_API Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance); - Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); - Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + NVMATH_API Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); + NVMATH_API Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); - Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); - Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); + NVMATH_API Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); + NVMATH_API Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); - Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points); - Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric); + NVMATH_API Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points); + NVMATH_API Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric); - Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points); - Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points); + NVMATH_API Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points); + NVMATH_API Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points); - Plane bestPlane(int n, const Vector3 * points); - bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON); + NVMATH_API Plane bestPlane(int n, const Vector3 * points); + NVMATH_API bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON); - bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]); - bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]); + NVMATH_API bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]); + NVMATH_API bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]); // Returns number of clusters [1-4]. - int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster); + NVMATH_API int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster); } } // nv namespace diff --git a/src/nvmath/Gamma.h b/src/nvmath/Gamma.h index e990a79..f59dd05 100644 --- a/src/nvmath/Gamma.h +++ b/src/nvmath/Gamma.h @@ -30,8 +30,8 @@ namespace nv { // gamma conversion of float array (in-place is allowed) - void powf_5_11(const float* src, float* dst, int count); - void powf_11_5(const float* src, float* dst, int count); + NVMATH_API void powf_5_11(const float* src, float* dst, int count); + NVMATH_API void powf_11_5(const float* src, float* dst, int count); } // nv namespace diff --git a/src/nvmath/Half.cpp b/src/nvmath/Half.cpp index c4ea48f..d979edb 100644 --- a/src/nvmath/Half.cpp +++ b/src/nvmath/Half.cpp @@ -577,6 +577,15 @@ namespace nv { uint32 offset_table[64]; } +uint32 nv::fast_half_to_float(uint16 h) +{ + // Initialize table if necessary. + if (mantissa_table[0] != 0) + half_init_tables(); + uint exp = h >> 10; + return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; +} + void nv::half_init_tables() { // Init mantissa table. @@ -742,7 +751,7 @@ static inline uint16_t float_to_half_branch(uint32_t x) #define S256(x) S64((x)), S64((x)+64), S64((x)+128), S64((x)+192) #define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768) -/* Lookup table-based algorithm from “Fast Half Float Conversions” +/* Lookup table-based algorithm from “Fast Half Float Conversions? * by Jeroen van der Zijp, November 2008. No rounding is performed, * and some NaN values may be incorrectly converted to Inf. */ static inline uint16_t float_to_half_nobranch(uint32_t x) diff --git a/src/nvmath/Half.h b/src/nvmath/Half.h index f0e60c4..f69fb67 100644 --- a/src/nvmath/Half.h +++ b/src/nvmath/Half.h @@ -6,29 +6,31 @@ namespace nv { - uint32 half_to_float( uint16 h ); - uint16 half_from_float( uint32 f ); + NVMATH_API uint32 half_to_float( uint16 h ); + NVMATH_API uint16 half_from_float( uint32 f ); // vin,vout must be 16 byte aligned. count must be a multiple of 8. // implement a non-SSE version if we need it. For now, this naming makes it clear this is only available when SSE2 is - void half_to_float_array_SSE2(const uint16 * vin, float * vout, int count); + NVMATH_API void half_to_float_array_SSE2(const uint16 * vin, float * vout, int count); - void half_init_tables(); + NVMATH_API void half_init_tables(); - extern uint32 mantissa_table[2048]; - extern uint32 exponent_table[64]; - extern uint32 offset_table[64]; + //extern uint32 mantissa_table[2048]; + //extern uint32 exponent_table[64]; + //extern uint32 offset_table[64]; // Fast half to float conversion based on: // http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf - inline uint32 fast_half_to_float(uint16 h) - { - // Initialize table if necessary. - if (mantissa_table[0] != 0) - half_init_tables(); - uint exp = h >> 10; - return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; - } + // inline uint32 fast_half_to_float(uint16 h) + // { + //// Initialize table if necessary. + //if (mantissa_table[0] != 0) + // half_init_tables(); + // uint exp = h >> 10; + // return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; + // } + + NVMATH_API uint32 fast_half_to_float(uint16 h); inline uint16 to_half(float c) { diff --git a/src/nvmath/Matrix.h b/src/nvmath/Matrix.h index d1171a1..91a0bfa 100644 --- a/src/nvmath/Matrix.h +++ b/src/nvmath/Matrix.h @@ -48,10 +48,10 @@ namespace nv }; // Solve equation system using LU decomposition and back-substitution. - extern bool solveLU(const Matrix2 & m, const Vector2 & b, Vector2 * x); + NVMATH_API bool solveLU(const Matrix2 & m, const Vector2 & b, Vector2 * x); // Solve equation system using Cramer's inverse. - extern bool solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x); + NVMATH_API bool solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x); // 3x3 matrix. @@ -87,12 +87,12 @@ namespace nv }; // Solve equation system using LU decomposition and back-substitution. - extern bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x); + NVMATH_API bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x); // Solve equation system using Cramer's inverse. - extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x); + NVMATH_API bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x); - extern Matrix3 inverse(const Matrix3 & m); + NVMATH_API Matrix3 inverse(const Matrix3 & m); // 4x4 matrix. @@ -138,16 +138,16 @@ namespace nv }; // Solve equation system using LU decomposition and back-substitution. - extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x); + NVMATH_API bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x); // Solve equation system using Cramer's inverse. - extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x); + NVMATH_API bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x); // Compute inverse using LU decomposition. - extern Matrix inverseLU(const Matrix & m); + NVMATH_API Matrix inverseLU(const Matrix & m); // Compute inverse using Gaussian elimination and partial pivoting. - extern Matrix inverse(const Matrix & m); + NVMATH_API Matrix inverse(const Matrix & m); } // nv namespace diff --git a/src/nvthread/CMakeLists.txt b/src/nvthread/CMakeLists.txt index 15dbc4e..61cc0f5 100644 --- a/src/nvthread/CMakeLists.txt +++ b/src/nvthread/CMakeLists.txt @@ -15,6 +15,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) ADD_DEFINITIONS(-DNVTHREAD_EXPORTS) IF(NVTHREAD_SHARED) + ADD_DEFINITIONS(-DNVTHREAD_SHARED=1) ADD_LIBRARY(nvthread SHARED ${THREAD_SRCS}) ELSE(NVTHREAD_SHARED) ADD_LIBRARY(nvthread ${THREAD_SRCS}) diff --git a/src/nvthread/ParallelFor.h b/src/nvthread/ParallelFor.h index f69e96c..34b838a 100644 --- a/src/nvthread/ParallelFor.h +++ b/src/nvthread/ParallelFor.h @@ -14,7 +14,7 @@ namespace nv typedef void ForTask(void * context, /*int tid,*/ int idx); // @@ It would be nice to have the thread index as an argument here. - struct ParallelFor { + struct NVTHREAD_CLASS ParallelFor { ParallelFor(ForTask * task, void * context); ~ParallelFor(); diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt index 445aa1e..501001e 100644 --- a/src/nvtt/CMakeLists.txt +++ b/src/nvtt/CMakeLists.txt @@ -44,6 +44,7 @@ INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104) ADD_DEFINITIONS(-DNVTT_EXPORTS) IF(NVTT_SHARED) + ADD_DEFINITIONS(-DNVTT_SHARED) ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) ELSE(NVTT_SHARED) ADD_LIBRARY(nvtt ${NVTT_SRCS})