fix cmake script

This commit is contained in:
will 2018-07-07 18:34:05 +08:00
parent 95bd6193cc
commit 714faa03e6
19 changed files with 148 additions and 132 deletions

View File

@ -35,11 +35,13 @@ IF(CMAKE_BUILD_TYPE MATCHES "debug")
ADD_DEFINITIONS(-D_DEBUG=1) ADD_DEFINITIONS(-D_DEBUG=1)
ENDIF() ENDIF()
OPTION(NVTT_SHARED "build shared library" OFF)
IF(NVTT_SHARED) IF(NVTT_SHARED)
SET(NVCORE_SHARED TRUE) SET(NVCORE_SHARED TRUE)
SET(NVMATH_SHARED TRUE) SET(NVMATH_SHARED TRUE)
SET(NVIMAGE_SHARED TRUE) SET(NVIMAGE_SHARED TRUE)
SET(NVTHREAD_SHARED TRUE)
ENDIF(NVTT_SHARED) ENDIF(NVTT_SHARED)
ADD_SUBDIRECTORY(extern) ADD_SUBDIRECTORY(extern)

View File

@ -9,5 +9,5 @@ ADD_SUBDIRECTORY(EtcLib)
ADD_SUBDIRECTORY(rg_etc1_v104) ADD_SUBDIRECTORY(rg_etc1_v104)
#ADD_SUBDIRECTORY(etcpack) #ADD_SUBDIRECTORY(etcpack)
ADD_SUBDIRECTORY(butteraugli) # ADD_SUBDIRECTORY(butteraugli)

View File

@ -202,11 +202,11 @@ namespace nv
void stripExtension(); void stripExtension();
// statics // statics
NVCORE_API static char separator(); static char separator();
NVCORE_API static const char * fileName(const char *); static const char * fileName(const char *);
NVCORE_API static const char * extension(const char *); static const char * extension(const char *);
NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR); static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR);
}; };

View File

@ -30,8 +30,8 @@ namespace nv {
NV_FORCEINLINE uint64 fastCpuClock() { return 0; } NV_FORCEINLINE uint64 fastCpuClock() { return 0; }
#endif #endif
uint64 systemClockFrequency(); NVCORE_API uint64 systemClockFrequency();
uint64 systemClock(); NVCORE_API uint64 systemClock();
class NVCORE_CLASS Timer class NVCORE_CLASS Timer
{ {

View File

@ -40,7 +40,7 @@ namespace nv
/// DXT1 block. /// DXT1 block.
struct BlockDXT1 struct NVIMAGE_CLASS BlockDXT1
{ {
Color16 col0; Color16 col0;
Color16 col1; Color16 col1;
@ -74,7 +74,7 @@ namespace nv
/// DXT3 alpha block with explicit alpha. /// DXT3 alpha block with explicit alpha.
struct AlphaBlockDXT3 struct NVIMAGE_CLASS AlphaBlockDXT3
{ {
union { union {
struct { struct {
@ -106,7 +106,7 @@ namespace nv
/// DXT3 block. /// DXT3 block.
struct BlockDXT3 struct NVIMAGE_CLASS BlockDXT3
{ {
AlphaBlockDXT3 alpha; AlphaBlockDXT3 alpha;
BlockDXT1 color; BlockDXT1 color;
@ -120,7 +120,7 @@ namespace nv
/// DXT5 alpha block. /// DXT5 alpha block.
struct AlphaBlockDXT5 struct NVIMAGE_CLASS AlphaBlockDXT5
{ {
union { union {
struct { struct {
@ -163,7 +163,7 @@ namespace nv
/// DXT5 block. /// DXT5 block.
struct BlockDXT5 struct NVIMAGE_CLASS BlockDXT5
{ {
AlphaBlockDXT5 alpha; AlphaBlockDXT5 alpha;
BlockDXT1 color; BlockDXT1 color;
@ -176,7 +176,7 @@ namespace nv
}; };
/// ATI1 block. /// ATI1 block.
struct BlockATI1 struct NVIMAGE_CLASS BlockATI1
{ {
AlphaBlockDXT5 alpha; AlphaBlockDXT5 alpha;
@ -187,7 +187,7 @@ namespace nv
}; };
/// ATI2 block. /// ATI2 block.
struct BlockATI2 struct NVIMAGE_CLASS BlockATI2
{ {
AlphaBlockDXT5 x; AlphaBlockDXT5 x;
AlphaBlockDXT5 y; AlphaBlockDXT5 y;
@ -199,7 +199,7 @@ namespace nv
}; };
/// CTX1 block. /// CTX1 block.
struct BlockCTX1 struct NVIMAGE_CLASS BlockCTX1
{ {
uint8 col0[2]; uint8 col0[2];
uint8 col1[2]; uint8 col1[2];
@ -218,14 +218,14 @@ namespace nv
}; };
/// BC6 block. /// BC6 block.
struct BlockBC6 struct NVIMAGE_CLASS BlockBC6
{ {
uint8 data[16]; // Not even going to try to write a union for this thing. uint8 data[16]; // Not even going to try to write a union for this thing.
void decodeBlock(Vector4 colors[16]) const; void decodeBlock(Vector4 colors[16]) const;
}; };
/// BC7 block. /// BC7 block.
struct BlockBC7 struct NVIMAGE_CLASS BlockBC7
{ {
uint8 data[16]; // Not even going to try to write a union for this thing. uint8 data[16]; // Not even going to try to write a union for this thing.
void decodeBlock(ColorBlock * block) const; void decodeBlock(ColorBlock * block) const;

View File

@ -4,6 +4,7 @@
#ifndef NV_IMAGE_COLORBLOCK_H #ifndef NV_IMAGE_COLORBLOCK_H
#define NV_IMAGE_COLORBLOCK_H #define NV_IMAGE_COLORBLOCK_H
#include "nvimage/nvimage.h"
#include "nvmath/Color.h" #include "nvmath/Color.h"
#include "nvmath/Vector.h" #include "nvmath/Vector.h"
@ -14,7 +15,7 @@ namespace nv
/// Uncompressed 4x4 color block. /// Uncompressed 4x4 color block.
struct ColorBlock struct NVIMAGE_CLASS ColorBlock
{ {
ColorBlock(); ColorBlock();
ColorBlock(const uint * linearImage); ColorBlock(const uint * linearImage);
@ -128,7 +129,7 @@ namespace nv
/// Uncompressed 4x4 alpha block. /// Uncompressed 4x4 alpha block.
struct AlphaBlock4x4 struct NVIMAGE_CLASS AlphaBlock4x4
{ {
void init(uint8 value); void init(uint8 value);
void init(const ColorBlock & src, uint channel); void init(const ColorBlock & src, uint channel);

View File

@ -268,9 +268,9 @@ namespace nv
DXGI_FORMAT_BC7_UNORM_SRGB = 99, DXGI_FORMAT_BC7_UNORM_SRGB = 99,
}; };
extern uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); NVIMAGE_API uint findD3D9Format(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask);
extern uint findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask); NVIMAGE_API uint findDXGIFormat(uint bitcount, uint rmask, uint gmask, uint bmask, uint amask);
struct RGBAPixelFormat struct RGBAPixelFormat
{ {

View File

@ -7,19 +7,19 @@ namespace nv
{ {
class FloatImage; class FloatImage;
float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); NVIMAGE_API float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float rmsAlphaError(const FloatImage * ref, const FloatImage * img); NVIMAGE_API float rmsAlphaError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); NVIMAGE_API float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img); NVIMAGE_API float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight); NVIMAGE_API float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight);
float cieLabError(const FloatImage * ref, const FloatImage * img); NVIMAGE_API float cieLabError(const FloatImage * ref, const FloatImage * img);
float cieLab94Error(const FloatImage * ref, const FloatImage * img); NVIMAGE_API float cieLab94Error(const FloatImage * ref, const FloatImage * img);
float spatialCieLabError(const FloatImage * ref, const FloatImage * img); NVIMAGE_API float spatialCieLabError(const FloatImage * ref, const FloatImage * img);
float averageAngularError(const FloatImage * img0, const FloatImage * img1); NVIMAGE_API float averageAngularError(const FloatImage * img0, const FloatImage * img1);
float rmsAngularError(const FloatImage * img0, const FloatImage * img1); NVIMAGE_API float rmsAngularError(const FloatImage * img0, const FloatImage * img1);
} // nv namespace } // nv namespace

View File

@ -24,7 +24,7 @@ namespace nv
class PolyphaseKernel; class PolyphaseKernel;
/// Multicomponent floating point image class. /// Multicomponent floating point image class.
class FloatImage class NVIMAGE_CLASS FloatImage
{ {
public: public:
@ -34,78 +34,78 @@ namespace nv
WrapMode_Mirror WrapMode_Mirror
}; };
NVIMAGE_API FloatImage(); FloatImage();
NVIMAGE_API FloatImage(const FloatImage & img); FloatImage(const FloatImage & img);
NVIMAGE_API FloatImage(const Image * img); FloatImage(const Image * img);
NVIMAGE_API virtual ~FloatImage(); virtual ~FloatImage();
/** @name Conversion. */ /** @name Conversion. */
//@{ //@{
NVIMAGE_API void initFrom(const Image * img); void initFrom(const Image * img);
NVIMAGE_API Image * createImage(uint base_component = 0, uint num = 4) const; Image * createImage(uint base_component = 0, uint num = 4) const;
NVIMAGE_API Image * createImageGammaCorrect(float gamma = 2.2f) const; Image * createImageGammaCorrect(float gamma = 2.2f) const;
//@} //@}
/** @name Allocation. */ /** @name Allocation. */
//@{ //@{
NVIMAGE_API void allocate(uint c, uint w, uint h, uint d = 1); void allocate(uint c, uint w, uint h, uint d = 1);
NVIMAGE_API void free(); // Does not clear members. void free(); // Does not clear members.
NVIMAGE_API void resizeChannelCount(uint c); void resizeChannelCount(uint c);
//@} //@}
/** @name Manipulation. */ /** @name Manipulation. */
//@{ //@{
NVIMAGE_API void clear(float f = 0.0f); void clear(float f = 0.0f);
NVIMAGE_API void clear(uint component, float f = 0.0f); void clear(uint component, float f = 0.0f);
NVIMAGE_API void copyChannel(uint src, uint dst); void copyChannel(uint src, uint dst);
NVIMAGE_API void normalize(uint base_component); void normalize(uint base_component);
NVIMAGE_API void packNormals(uint base_component); void packNormals(uint base_component);
NVIMAGE_API void expandNormals(uint base_component); void expandNormals(uint base_component);
NVIMAGE_API void scaleBias(uint base_component, uint num, float scale, float add); void scaleBias(uint base_component, uint num, float scale, float add);
NVIMAGE_API void clamp(uint base_component, uint num, float low, float high); void clamp(uint base_component, uint num, float low, float high);
NVIMAGE_API void toLinear(uint base_component, uint num, float gamma = 2.2f); void toLinear(uint base_component, uint num, float gamma = 2.2f);
NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f); void toGamma(uint base_component, uint num, float gamma = 2.2f);
NVIMAGE_API void exponentiate(uint base_component, uint num, float power); void exponentiate(uint base_component, uint num, float power);
NVIMAGE_API void transform(uint base_component, const Matrix & m, const Vector4 & offset); void transform(uint base_component, const Matrix & m, const Vector4 & offset);
NVIMAGE_API void swizzle(uint base_component, uint r, uint g, uint b, uint a); void swizzle(uint base_component, uint r, uint g, uint b, uint a);
NVIMAGE_API FloatImage * fastDownSample() const; FloatImage * fastDownSample() const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const; FloatImage * downSample(const Filter & filter, WrapMode wm) const;
NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const; FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const; FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const; FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const; FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const;
NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const; FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const;
NVIMAGE_API void convolve(const Kernel2 & k, uint c, WrapMode wm); void convolve(const Kernel2 & k, uint c, WrapMode wm);
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const; //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const;
//NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const; //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const;
//@} //@}
NVIMAGE_API float applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const; float applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; float applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const; void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const; void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const; void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const; void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const; void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const; void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void flipX(); void flipX();
NVIMAGE_API void flipY(); void flipY();
NVIMAGE_API void flipZ(); void flipZ();
NVIMAGE_API float alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale = 1.0f) const; float alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale = 1.0f) const;
NVIMAGE_API void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel); void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel);
uint width() const { return m_width; } uint width() const { return m_width; }

View File

@ -110,7 +110,7 @@ namespace nv
const uint KTX_BASE_INTERNAL_STENCIL_INDEX = 0x1901; const uint KTX_BASE_INTERNAL_STENCIL_INDEX = 0x1901;
struct KtxHeader { struct NVIMAGE_CLASS KtxHeader {
uint8 identifier[12]; uint8 identifier[12];
uint32 endianness; uint32 endianness;
uint32 glType; uint32 glType;

View File

@ -44,12 +44,12 @@ namespace nv
}; };
// @@ These two functions should be deprecated: // @@ These two functions should be deprecated:
FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3); NVIMAGE_API FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter = NormalMapFilter_Sobel3x3);
FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights); NVIMAGE_API FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights);
FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights); NVIMAGE_API FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights);
void normalizeNormalMap(FloatImage * img); NVIMAGE_API void normalizeNormalMap(FloatImage * img);
// @@ Add generation of DU/DV maps. // @@ Add generation of DU/DV maps.

View File

@ -11,38 +11,38 @@ namespace nv
{ {
namespace Fit namespace Fit
{ {
Vector3 computeCentroid(int n, const Vector3 * points); NVMATH_API Vector3 computeCentroid(int n, const Vector3 * points);
Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric); NVMATH_API Vector3 computeCentroid(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector4 computeCentroid(int n, const Vector4 * points); NVMATH_API Vector4 computeCentroid(int n, const Vector4 * points);
Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric); NVMATH_API Vector4 computeCentroid(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
Vector3 computeCovariance(int n, const Vector3 * points, float * covariance); NVMATH_API Vector3 computeCovariance(int n, const Vector3 * points, float * covariance);
Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance); NVMATH_API Vector3 computeCovariance(int n, const Vector3 * points, const float * weights, const Vector3 & metric, float * covariance);
Vector4 computeCovariance(int n, const Vector4 * points, float * covariance); NVMATH_API Vector4 computeCovariance(int n, const Vector4 * points, float * covariance);
Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance); NVMATH_API Vector4 computeCovariance(int n, const Vector4 * points, const float * weights, const Vector4 & metric, float * covariance);
Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points); NVMATH_API Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points);
Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric); NVMATH_API Vector3 computePrincipalComponent_PowerMethod(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points); NVMATH_API Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points);
Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric); NVMATH_API Vector3 computePrincipalComponent_EigenSolver(int n, const Vector3 * points, const float * weights, const Vector3 & metric);
Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points); NVMATH_API Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points);
Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric); NVMATH_API Vector4 computePrincipalComponent_EigenSolver(int n, const Vector4 * points, const float * weights, const Vector4 & metric);
Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points); NVMATH_API Vector3 computePrincipalComponent_SVD(int n, const Vector3 * points);
Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points); NVMATH_API Vector4 computePrincipalComponent_SVD(int n, const Vector4 * points);
Plane bestPlane(int n, const Vector3 * points); NVMATH_API Plane bestPlane(int n, const Vector3 * points);
bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON); NVMATH_API bool isPlanar(int n, const Vector3 * points, float epsilon = NV_EPSILON);
bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]); NVMATH_API bool eigenSolveSymmetric3(const float matrix[6], float eigenValues[3], Vector3 eigenVectors[3]);
bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]); NVMATH_API bool eigenSolveSymmetric4(const float matrix[10], float eigenValues[4], Vector4 eigenVectors[4]);
// Returns number of clusters [1-4]. // Returns number of clusters [1-4].
int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster); NVMATH_API int compute4Means(int n, const Vector3 * points, const float * weights, const Vector3 & metric, Vector3 * cluster);
} }
} // nv namespace } // nv namespace

View File

@ -30,8 +30,8 @@
namespace nv { namespace nv {
// gamma conversion of float array (in-place is allowed) // gamma conversion of float array (in-place is allowed)
void powf_5_11(const float* src, float* dst, int count); NVMATH_API void powf_5_11(const float* src, float* dst, int count);
void powf_11_5(const float* src, float* dst, int count); NVMATH_API void powf_11_5(const float* src, float* dst, int count);
} // nv namespace } // nv namespace

View File

@ -577,6 +577,15 @@ namespace nv {
uint32 offset_table[64]; uint32 offset_table[64];
} }
uint32 nv::fast_half_to_float(uint16 h)
{
// Initialize table if necessary.
if (mantissa_table[0] != 0)
half_init_tables();
uint exp = h >> 10;
return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
}
void nv::half_init_tables() void nv::half_init_tables()
{ {
// Init mantissa table. // Init mantissa table.
@ -742,7 +751,7 @@ static inline uint16_t float_to_half_branch(uint32_t x)
#define S256(x) S64((x)), S64((x)+64), S64((x)+128), S64((x)+192) #define S256(x) S64((x)), S64((x)+64), S64((x)+128), S64((x)+192)
#define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768) #define S1024(x) S256((x)), S256((x)+256), S256((x)+512), S256((x)+768)
/* Lookup table-based algorithm from “Fast Half Float Conversions /* Lookup table-based algorithm from “Fast Half Float Conversions?
* by Jeroen van der Zijp, November 2008. No rounding is performed, * by Jeroen van der Zijp, November 2008. No rounding is performed,
* and some NaN values may be incorrectly converted to Inf. */ * and some NaN values may be incorrectly converted to Inf. */
static inline uint16_t float_to_half_nobranch(uint32_t x) static inline uint16_t float_to_half_nobranch(uint32_t x)

View File

@ -6,29 +6,31 @@
namespace nv { namespace nv {
uint32 half_to_float( uint16 h ); NVMATH_API uint32 half_to_float( uint16 h );
uint16 half_from_float( uint32 f ); NVMATH_API uint16 half_from_float( uint32 f );
// vin,vout must be 16 byte aligned. count must be a multiple of 8. // vin,vout must be 16 byte aligned. count must be a multiple of 8.
// implement a non-SSE version if we need it. For now, this naming makes it clear this is only available when SSE2 is // implement a non-SSE version if we need it. For now, this naming makes it clear this is only available when SSE2 is
void half_to_float_array_SSE2(const uint16 * vin, float * vout, int count); NVMATH_API void half_to_float_array_SSE2(const uint16 * vin, float * vout, int count);
void half_init_tables(); NVMATH_API void half_init_tables();
extern uint32 mantissa_table[2048]; //extern uint32 mantissa_table[2048];
extern uint32 exponent_table[64]; //extern uint32 exponent_table[64];
extern uint32 offset_table[64]; //extern uint32 offset_table[64];
// Fast half to float conversion based on: // Fast half to float conversion based on:
// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf // http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
inline uint32 fast_half_to_float(uint16 h) // inline uint32 fast_half_to_float(uint16 h)
{ // {
// Initialize table if necessary. //// Initialize table if necessary.
if (mantissa_table[0] != 0) //if (mantissa_table[0] != 0)
half_init_tables(); // half_init_tables();
uint exp = h >> 10; // uint exp = h >> 10;
return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp]; // return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
} // }
NVMATH_API uint32 fast_half_to_float(uint16 h);
inline uint16 to_half(float c) { inline uint16 to_half(float c) {

View File

@ -48,10 +48,10 @@ namespace nv
}; };
// Solve equation system using LU decomposition and back-substitution. // Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix2 & m, const Vector2 & b, Vector2 * x); NVMATH_API bool solveLU(const Matrix2 & m, const Vector2 & b, Vector2 * x);
// Solve equation system using Cramer's inverse. // Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x); NVMATH_API bool solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x);
// 3x3 matrix. // 3x3 matrix.
@ -87,12 +87,12 @@ namespace nv
}; };
// Solve equation system using LU decomposition and back-substitution. // Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x); NVMATH_API bool solveLU(const Matrix3 & m, const Vector3 & b, Vector3 * x);
// Solve equation system using Cramer's inverse. // Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x); NVMATH_API bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
extern Matrix3 inverse(const Matrix3 & m); NVMATH_API Matrix3 inverse(const Matrix3 & m);
// 4x4 matrix. // 4x4 matrix.
@ -138,16 +138,16 @@ namespace nv
}; };
// Solve equation system using LU decomposition and back-substitution. // Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x); NVMATH_API bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x);
// Solve equation system using Cramer's inverse. // Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x); NVMATH_API bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x);
// Compute inverse using LU decomposition. // Compute inverse using LU decomposition.
extern Matrix inverseLU(const Matrix & m); NVMATH_API Matrix inverseLU(const Matrix & m);
// Compute inverse using Gaussian elimination and partial pivoting. // Compute inverse using Gaussian elimination and partial pivoting.
extern Matrix inverse(const Matrix & m); NVMATH_API Matrix inverse(const Matrix & m);
} // nv namespace } // nv namespace

View File

@ -15,6 +15,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVTHREAD_EXPORTS) ADD_DEFINITIONS(-DNVTHREAD_EXPORTS)
IF(NVTHREAD_SHARED) IF(NVTHREAD_SHARED)
ADD_DEFINITIONS(-DNVTHREAD_SHARED=1)
ADD_LIBRARY(nvthread SHARED ${THREAD_SRCS}) ADD_LIBRARY(nvthread SHARED ${THREAD_SRCS})
ELSE(NVTHREAD_SHARED) ELSE(NVTHREAD_SHARED)
ADD_LIBRARY(nvthread ${THREAD_SRCS}) ADD_LIBRARY(nvthread ${THREAD_SRCS})

View File

@ -14,7 +14,7 @@ namespace nv
typedef void ForTask(void * context, /*int tid,*/ int idx); // @@ It would be nice to have the thread index as an argument here. typedef void ForTask(void * context, /*int tid,*/ int idx); // @@ It would be nice to have the thread index as an argument here.
struct ParallelFor { struct NVTHREAD_CLASS ParallelFor {
ParallelFor(ForTask * task, void * context); ParallelFor(ForTask * task, void * context);
~ParallelFor(); ~ParallelFor();

View File

@ -44,6 +44,7 @@ INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104)
ADD_DEFINITIONS(-DNVTT_EXPORTS) ADD_DEFINITIONS(-DNVTT_EXPORTS)
IF(NVTT_SHARED) IF(NVTT_SHARED)
ADD_DEFINITIONS(-DNVTT_SHARED)
ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS}) ADD_LIBRARY(nvtt SHARED ${NVTT_SRCS})
ELSE(NVTT_SHARED) ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS}) ADD_LIBRARY(nvtt ${NVTT_SRCS})