From 4f58a1bf8b844ca0a8c70fb5a595f53b9d8c9e54 Mon Sep 17 00:00:00 2001 From: castano Date: Wed, 18 Mar 2009 04:04:09 +0000 Subject: [PATCH] Merge changes from trunk. --- ChangeLog | 8 +- src/nvcore/Containers.h | 4 +- src/nvcore/Debug.cpp | 6 +- src/nvcore/DefsVcWin32.h | 2 - src/nvcore/StrLib.h | 6 +- src/nvimage/Filter.cpp | 6 +- src/nvimage/Filter.h | 114 ++++++++++++------------ src/nvimage/FloatImage.cpp | 171 +++++++++++++++++++++++++++++++++--- src/nvimage/FloatImage.h | 33 ++++--- src/nvimage/NormalMap.cpp | 9 +- src/nvimage/PsdFile.h | 2 +- src/nvimage/Quantize.cpp | 155 ++++++++++++++++---------------- src/nvimage/Quantize.h | 3 + src/nvmath/nvmath.h | 35 ++++++-- src/nvtt/Compressor.cpp | 9 +- src/nvtt/cuda/CudaMath.h | 2 +- src/nvtt/cuda/CudaUtils.cpp | 27 +++++- 17 files changed, 403 insertions(+), 189 deletions(-) diff --git a/ChangeLog b/ChangeLog index 507b526..67005fe 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,13 @@ NVIDIA Texture Tools version 2.0.6 * Fix dll version checking. - * Detect CUDA 2.1 correctly. + * Detect CUDA 2.1 and future CUDA versions correctly. * Print CUDA detection message in nvcompress. + * Select the fastest CUDA device. * Compile squish with -fPIC. Fixes issue 74. - * Merge changes from trunk to fix warnings under gcc 4.3.2. + * Fix warnings under gcc 4.3.2. + * Fix nvzoom option typo by Frank Richter. Fixes issue 81. + * Do not use CUDA to compress small mipmaps. Fixes issue 76. + * Compute mipmaps of semi-transparent images correctly. NVIDIA Texture Tools version 2.0.5 * Fix error in single color compressor. Fixes issue 66. diff --git a/src/nvcore/Containers.h b/src/nvcore/Containers.h index 5455a2e..f0b63d4 100644 --- a/src/nvcore/Containers.h +++ b/src/nvcore/Containers.h @@ -824,13 +824,13 @@ namespace nv } /// Number of entries in the hash. - int size() + int size() const { return entry_count; } /// Number of entries in the hash. - int count() + int count() const { return size(); } diff --git a/src/nvcore/Debug.cpp b/src/nvcore/Debug.cpp index c861a66..a3715a7 100644 --- a/src/nvcore/Debug.cpp +++ b/src/nvcore/Debug.cpp @@ -136,7 +136,11 @@ namespace #if defined(HAVE_EXECINFO_H) // NV_OS_LINUX static bool nvHasStackTrace() { +#if NV_OS_DARWIN return backtrace != NULL; +#else + return true; +#endif } static void nvPrintStackTrace(void * trace[], int size, int start=0) { @@ -401,7 +405,7 @@ namespace { void * trace[64]; int size = backtrace(trace, 64); - nvPrintStackTrace(trace, size, 3); + nvPrintStackTrace(trace, size, 2); } # endif diff --git a/src/nvcore/DefsVcWin32.h b/src/nvcore/DefsVcWin32.h index c025625..c1b6d36 100644 --- a/src/nvcore/DefsVcWin32.h +++ b/src/nvcore/DefsVcWin32.h @@ -72,8 +72,6 @@ typedef uint32 uint; #pragma warning(disable : 4711) // function selected for automatic inlining #pragma warning(disable : 4725) // Pentium fdiv bug -#pragma warning(disable : 4345) // behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized - #pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged. #pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup diff --git a/src/nvcore/StrLib.h b/src/nvcore/StrLib.h index 611cade..e425bb6 100644 --- a/src/nvcore/StrLib.h +++ b/src/nvcore/StrLib.h @@ -137,9 +137,9 @@ namespace nv void stripExtension(); // statics - static char separator(); - static const char * fileName(const char *); - static const char * extension(const char *); + NVCORE_API static char separator(); + NVCORE_API static const char * fileName(const char *); + NVCORE_API static const char * extension(const char *); }; diff --git a/src/nvimage/Filter.cpp b/src/nvimage/Filter.cpp index 7677ce0..43c9ef7 100644 --- a/src/nvimage/Filter.cpp +++ b/src/nvimage/Filter.cpp @@ -33,11 +33,10 @@ * http://www.dspguide.com/ch16.htm */ +#include "Filter.h" -#include // swap -#include // fabs #include // Vector4 -#include +#include // swap using namespace nv; @@ -582,7 +581,6 @@ PolyphaseKernel::PolyphaseKernel(const Filter & f, uint srcLength, uint dstLengt m_data[i * m_windowSize + j] /= total; } } - } PolyphaseKernel::~PolyphaseKernel() diff --git a/src/nvimage/Filter.h b/src/nvimage/Filter.h index 55171ac..89b5606 100644 --- a/src/nvimage/Filter.h +++ b/src/nvimage/Filter.h @@ -11,16 +11,16 @@ namespace nv class Vector4; /// Base filter class. - class Filter + class NVIMAGE_CLASS Filter { public: - NVIMAGE_API Filter(float width); - NVIMAGE_API virtual ~Filter(); + Filter(float width); + virtual ~Filter(); - NVIMAGE_API float width() const { return m_width; } - NVIMAGE_API float sampleDelta(float x, float scale) const; - NVIMAGE_API float sampleBox(float x, float scale, int samples) const; - NVIMAGE_API float sampleTriangle(float x, float scale, int samples) const; + float width() const { return m_width; } + float sampleDelta(float x, float scale) const; + float sampleBox(float x, float scale, int samples) const; + float sampleTriangle(float x, float scale, int samples) const; virtual float evaluate(float x) const = 0; @@ -29,56 +29,56 @@ namespace nv }; // Box filter. - class BoxFilter : public Filter + class NVIMAGE_CLASS BoxFilter : public Filter { public: - NVIMAGE_API BoxFilter(); - NVIMAGE_API BoxFilter(float width); - NVIMAGE_API virtual float evaluate(float x) const; + BoxFilter(); + BoxFilter(float width); + virtual float evaluate(float x) const; }; // Triangle (bilinear/tent) filter. - class TriangleFilter : public Filter + class NVIMAGE_CLASS TriangleFilter : public Filter { public: - NVIMAGE_API TriangleFilter(); - NVIMAGE_API TriangleFilter(float width); - NVIMAGE_API virtual float evaluate(float x) const; + TriangleFilter(); + TriangleFilter(float width); + virtual float evaluate(float x) const; }; // Quadratic (bell) filter. - class QuadraticFilter : public Filter + class NVIMAGE_CLASS QuadraticFilter : public Filter { public: - NVIMAGE_API QuadraticFilter(); - NVIMAGE_API virtual float evaluate(float x) const; + QuadraticFilter(); + virtual float evaluate(float x) const; }; // Cubic filter from Thatcher Ulrich. - class CubicFilter : public Filter + class NVIMAGE_CLASS CubicFilter : public Filter { public: - NVIMAGE_API CubicFilter(); - NVIMAGE_API virtual float evaluate(float x) const; + CubicFilter(); + virtual float evaluate(float x) const; }; // Cubic b-spline filter from Paul Heckbert. - class BSplineFilter : public Filter + class NVIMAGE_CLASS BSplineFilter : public Filter { public: - NVIMAGE_API BSplineFilter(); - NVIMAGE_API virtual float evaluate(float x) const; + BSplineFilter(); + virtual float evaluate(float x) const; }; /// Mitchell & Netravali's two-param cubic /// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88 - class MitchellFilter : public Filter + class NVIMAGE_CLASS MitchellFilter : public Filter { public: - NVIMAGE_API MitchellFilter(); - NVIMAGE_API virtual float evaluate(float x) const; + MitchellFilter(); + virtual float evaluate(float x) const; - NVIMAGE_API void setParameters(float a, float b); + void setParameters(float b, float c); private: float p0, p2, p3; @@ -86,29 +86,29 @@ namespace nv }; // Lanczos3 filter. - class LanczosFilter : public Filter + class NVIMAGE_CLASS LanczosFilter : public Filter { public: - NVIMAGE_API LanczosFilter(); - NVIMAGE_API virtual float evaluate(float x) const; + LanczosFilter(); + virtual float evaluate(float x) const; }; // Sinc filter. - class SincFilter : public Filter + class NVIMAGE_CLASS SincFilter : public Filter { public: - NVIMAGE_API SincFilter(float w); - NVIMAGE_API virtual float evaluate(float x) const; + SincFilter(float w); + virtual float evaluate(float x) const; }; // Kaiser filter. - class KaiserFilter : public Filter + class NVIMAGE_CLASS KaiserFilter : public Filter { public: - NVIMAGE_API KaiserFilter(float w); - NVIMAGE_API virtual float evaluate(float x) const; + KaiserFilter(float w); + virtual float evaluate(float x) const; - NVIMAGE_API void setParameters(float a, float stretch); + void setParameters(float a, float stretch); private: float alpha; @@ -118,12 +118,12 @@ namespace nv /// A 1D kernel. Used to precompute filter weights. - class Kernel1 + class NVIMAGE_CLASS Kernel1 { NV_FORBID_COPY(Kernel1); public: - NVIMAGE_API Kernel1(const Filter & f, int iscale, int samples = 32); - NVIMAGE_API ~Kernel1(); + Kernel1(const Filter & f, int iscale, int samples = 32); + ~Kernel1(); float valueAt(uint x) const { nvDebugCheck(x < (uint)m_windowSize); @@ -138,7 +138,7 @@ namespace nv return m_width; } - NVIMAGE_API void debugPrint(); + void debugPrint(); private: int m_windowSize; @@ -148,15 +148,15 @@ namespace nv /// A 2D kernel. - class Kernel2 + class NVIMAGE_CLASS Kernel2 { public: - NVIMAGE_API Kernel2(uint width); - NVIMAGE_API Kernel2(const Kernel2 & k); - NVIMAGE_API ~Kernel2(); + Kernel2(uint width); + Kernel2(const Kernel2 & k); + ~Kernel2(); - NVIMAGE_API void normalize(); - NVIMAGE_API void transpose(); + void normalize(); + void transpose(); float valueAt(uint x, uint y) const { return m_data[y * m_windowSize + x]; @@ -166,12 +166,12 @@ namespace nv return m_windowSize; } - NVIMAGE_API void initLaplacian(); - NVIMAGE_API void initEdgeDetection(); - NVIMAGE_API void initSobel(); - NVIMAGE_API void initPrewitt(); + void initLaplacian(); + void initEdgeDetection(); + void initSobel(); + void initPrewitt(); - NVIMAGE_API void initBlendedSobel(const Vector4 & scale); + void initBlendedSobel(const Vector4 & scale); private: const uint m_windowSize; @@ -180,12 +180,12 @@ namespace nv /// A 1D polyphase kernel - class PolyphaseKernel + class NVIMAGE_CLASS PolyphaseKernel { NV_FORBID_COPY(PolyphaseKernel); public: - NVIMAGE_API PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32); - NVIMAGE_API ~PolyphaseKernel(); + PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32); + ~PolyphaseKernel(); int windowSize() const { return m_windowSize; @@ -205,7 +205,7 @@ namespace nv return m_data[column * m_windowSize + x]; } - NVIMAGE_API void debugPrint() const; + void debugPrint() const; private: int m_windowSize; diff --git a/src/nvimage/FloatImage.cpp b/src/nvimage/FloatImage.cpp index e16ae35..90818ca 100644 --- a/src/nvimage/FloatImage.cpp +++ b/src/nvimage/FloatImage.cpp @@ -1,16 +1,18 @@ // This code is in the public domain -- castanyo@yahoo.es -#include -#include - -#include - #include "FloatImage.h" #include "Filter.h" #include "Image.h" +#include +#include + +#include +#include + #include + using namespace nv; namespace @@ -140,7 +142,8 @@ Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const /// Allocate a 2d float image of the given format and the given extents. void FloatImage::allocate(uint c, uint w, uint h) { - nvCheck(m_mem == NULL); + free(); + m_width = w; m_height = h; m_componentNum = c; @@ -151,7 +154,6 @@ void FloatImage::allocate(uint c, uint w, uint h) /// Free the image, but don't clear the members. void FloatImage::free() { - nvCheck(m_mem != NULL); nv::mem::free( reinterpret_cast(m_mem) ); m_mem = NULL; } @@ -549,6 +551,15 @@ FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const return resize(filter, w, h, wm); } +/// Downsample applying a 1D kernel separately in each dimension. +FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm, uint alpha) const +{ + const uint w = max(1, m_width / 2); + const uint h = max(1, m_height / 2); + + return resize(filter, w, h, wm, alpha); +} + /// Downsample applying a 1D kernel separately in each dimension. FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const @@ -620,10 +631,56 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode return dst_image.release(); } +/// Downsample applying a 1D kernel separately in each dimension. +FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const +{ + nvCheck(alpha < m_componentNum); + + AutoPtr tmp_image( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); + + { + tmp_image->allocate(m_componentNum, w, m_height); + dst_image->allocate(m_componentNum, w, h); + + Array tmp_column(h); + tmp_column.resize(h); + + for (uint c = 0; c < m_componentNum; c++) + { + float * tmp_channel = tmp_image->channel(c); + + for (uint y = 0; y < m_height; y++) { + this->applyKernelHorizontal(xkernel, y, c, alpha, wm, tmp_channel + y * w); + } + } + + // Process all channels before applying vertical kernel to make sure alpha has been computed. + + for (uint c = 0; c < m_componentNum; c++) + { + float * dst_channel = dst_image->channel(c); + + for (uint x = 0; x < w; x++) { + tmp_image->applyKernelVertical(ykernel, x, c, alpha, wm, tmp_column.unsecureBuffer()); + + for (uint y = 0; y < h; y++) { + dst_channel[y * w + x] = tmp_column[y]; + } + } + } + } + + return dst_image.release(); +} + /// Apply 2D kernel at the given coordinates and return result. -float FloatImage::applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const +float FloatImage::applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const { nvDebugCheck(k != NULL); @@ -652,7 +709,7 @@ float FloatImage::applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode w /// Apply 1D vertical kernel at the given coordinates and return result. -float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const +float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const { nvDebugCheck(k != NULL); @@ -674,7 +731,7 @@ float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, int c, Wr } /// Apply 1D horizontal kernel at the given coordinates and return result. -float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const +float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const { nvDebugCheck(k != NULL); @@ -697,7 +754,7 @@ float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, /// Apply 1D vertical kernel at the given coordinates and return result. -void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, int c, WrapMode wm, float * output) const +void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * __restrict output) const { const uint length = k.length(); const float scale = float(length) / float(m_height); @@ -729,7 +786,7 @@ void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, int c, Wr } /// Apply 1D horizontal kernel at the given coordinates and return result. -void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, WrapMode wm, float * output) const +void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * __restrict output) const { const uint length = k.length(); const float scale = float(length) / float(m_width); @@ -760,3 +817,93 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, } } + +/// Apply 1D vertical kernel at the given coordinates and return result. +void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * __restrict output) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_height); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + const float * alpha = this->channel(a); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvCheck(right - left <= windowSize); + + float norm = 0; + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, j+left, wm); + + float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); + norm += w; + sum += w * channel[idx]; + } + + output[i] = sum / norm; + } +} + +/// Apply 1D horizontal kernel at the given coordinates and return result. +void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * __restrict output) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_width); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + const float * alpha = this->channel(a); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvDebugCheck(right - left <= windowSize); + + float norm = 0.0f; + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(left + j, y, wm); + + float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); + norm += w; + sum += w * channel[idx]; + } + + output[i] = sum / norm; + } +} + +FloatImage* FloatImage::clone() const +{ + FloatImage* copy = new FloatImage(); + copy->m_width = m_width; + copy->m_height = m_height; + copy->m_componentNum = m_componentNum; + copy->m_count = m_count; + + if(m_mem) + { + copy->allocate(m_componentNum, m_width, m_height); + memcpy(copy->m_mem, m_mem, m_count * sizeof(float)); + } + + return copy; +} + diff --git a/src/nvimage/FloatImage.h b/src/nvimage/FloatImage.h index 74e7537..96d1630 100644 --- a/src/nvimage/FloatImage.h +++ b/src/nvimage/FloatImage.h @@ -3,12 +3,20 @@ #ifndef NV_IMAGE_FLOATIMAGE_H #define NV_IMAGE_FLOATIMAGE_H +#include + +#include + #include #include // clamp -#include + +#include // abs + namespace nv { +class Vector4; +class Matrix; class Image; class Filter; class Kernel1; @@ -60,20 +68,22 @@ public: NVIMAGE_API void toGamma(uint base_component, uint num, float gamma = 2.2f); NVIMAGE_API void exponentiate(uint base_component, uint num, float power); - + NVIMAGE_API FloatImage * fastDownSample() const; NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const; + NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const; NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const; - //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const; - //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const; + NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const; //@} - NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, int c, WrapMode wm) const; - NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, int c, WrapMode wm) const; - NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, int c, WrapMode wm) const; - NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, int c, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, int c, WrapMode wm, float * output) const; + NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const; + NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const; + NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const; + NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * output) const; uint width() const { return m_width; } @@ -109,6 +119,9 @@ public: float sampleLinearMirror(float x, float y, int c) const; //@} + + FloatImage* clone() const; + public: uint index(uint x, uint y) const; @@ -234,7 +247,7 @@ inline uint FloatImage::indexMirror(int x, int y) const } if (m_height == 1) y = 0; - + y = abs(y); while (y >= m_height) { y = abs(m_height + m_height - y - 2); diff --git a/src/nvimage/NormalMap.cpp b/src/nvimage/NormalMap.cpp index 841c5ae..2ece574 100644 --- a/src/nvimage/NormalMap.cpp +++ b/src/nvimage/NormalMap.cpp @@ -21,15 +21,16 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include - -#include - #include #include #include #include +#include + +#include + + using namespace nv; // Create normal map using the given kernels. diff --git a/src/nvimage/PsdFile.h b/src/nvimage/PsdFile.h index 8b2fc18..41379ed 100644 --- a/src/nvimage/PsdFile.h +++ b/src/nvimage/PsdFile.h @@ -39,7 +39,7 @@ namespace nv bool isSupported() const { if (version != 1) { - printf("*** bad version number %u\n", version); + nvDebug("*** bad version number %u\n", version); return false; } if (channel_count > 4) { diff --git a/src/nvimage/Quantize.cpp b/src/nvimage/Quantize.cpp index ce15d1c..56812bd 100644 --- a/src/nvimage/Quantize.cpp +++ b/src/nvimage/Quantize.cpp @@ -12,11 +12,14 @@ http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT @@ This code needs to be reviewed, I'm not sure it's correct. */ +#include +#include +#include + #include -#include -#include -#include +#include // swap + using namespace nv; @@ -133,17 +136,17 @@ void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, u Color32 pixel = image->pixel(x, y); // Convert to our desired size, and reconstruct. - pixel.r = PixelFormat::convert(pixel.r, 8, rsize); - pixel.r = PixelFormat::convert(pixel.r, rsize, 8); - - pixel.g = PixelFormat::convert(pixel.g, 8, gsize); - pixel.g = PixelFormat::convert(pixel.g, gsize, 8); - - pixel.b = PixelFormat::convert(pixel.b, 8, bsize); - pixel.b = PixelFormat::convert(pixel.b, bsize, 8); - - pixel.a = PixelFormat::convert(pixel.a, 8, asize); - pixel.a = PixelFormat::convert(pixel.a, asize, 8); + pixel.r = PixelFormat::convert(pixel.r, 8, rsize); + pixel.r = PixelFormat::convert(pixel.r, rsize, 8); + + pixel.g = PixelFormat::convert(pixel.g, 8, gsize); + pixel.g = PixelFormat::convert(pixel.g, gsize, 8); + + pixel.b = PixelFormat::convert(pixel.b, 8, bsize); + pixel.b = PixelFormat::convert(pixel.b, bsize, 8); + + pixel.a = PixelFormat::convert(pixel.a, 8, asize); + pixel.a = PixelFormat::convert(pixel.a, asize, 8); // Store color. image->pixel(x, y) = pixel; @@ -152,65 +155,65 @@ void nv::Quantize::Truncate(Image * image, uint rsize, uint gsize, uint bsize, u } -// Error diffusion. Floyd Steinberg. -void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize) -{ - nvCheck(image != NULL); - - const uint w = image->width(); - const uint h = image->height(); - - Vector4 * row0 = new Vector4[w+2]; - Vector4 * row1 = new Vector4[w+2]; - memset(row0, 0, sizeof(Vector4)*(w+2)); - memset(row1, 0, sizeof(Vector4)*(w+2)); - - for (uint y = 0; y < h; y++) { - for (uint x = 0; x < w; x++) { - - Color32 pixel = image->pixel(x, y); - - // Add error. - pixel.r = clamp(int(pixel.r) + int(row0[1+x].x()), 0, 255); - pixel.g = clamp(int(pixel.g) + int(row0[1+x].y()), 0, 255); - pixel.b = clamp(int(pixel.b) + int(row0[1+x].z()), 0, 255); - pixel.a = clamp(int(pixel.a) + int(row0[1+x].w()), 0, 255); - - int r = pixel.r; - int g = pixel.g; - int b = pixel.b; - int a = pixel.a; - - // Convert to our desired size, and reconstruct. - r = PixelFormat::convert(r, 8, rsize); - r = PixelFormat::convert(r, rsize, 8); - - g = PixelFormat::convert(g, 8, gsize); - g = PixelFormat::convert(g, gsize, 8); - - b = PixelFormat::convert(b, 8, bsize); - b = PixelFormat::convert(b, bsize, 8); - - a = PixelFormat::convert(a, 8, asize); - a = PixelFormat::convert(a, asize, 8); - - // Store color. - image->pixel(x, y) = Color32(r, g, b, a); - - // Compute new error. - Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a)); - - // Propagate new error. - row0[1+x+1] += 7.0f / 16.0f * diff; - row1[1+x-1] += 3.0f / 16.0f * diff; - row1[1+x+0] += 5.0f / 16.0f * diff; - row1[1+x+1] += 1.0f / 16.0f * diff; - } - - swap(row0, row1); - memset(row1, 0, sizeof(Vector4)*(w+2)); - } - - delete [] row0; - delete [] row1; -} +// Error diffusion. Floyd Steinberg. +void nv::Quantize::FloydSteinberg(Image * image, uint rsize, uint gsize, uint bsize, uint asize) +{ + nvCheck(image != NULL); + + const uint w = image->width(); + const uint h = image->height(); + + Vector4 * row0 = new Vector4[w+2]; + Vector4 * row1 = new Vector4[w+2]; + memset(row0, 0, sizeof(Vector4)*(w+2)); + memset(row1, 0, sizeof(Vector4)*(w+2)); + + for (uint y = 0; y < h; y++) { + for (uint x = 0; x < w; x++) { + + Color32 pixel = image->pixel(x, y); + + // Add error. + pixel.r = clamp(int(pixel.r) + int(row0[1+x].x()), 0, 255); + pixel.g = clamp(int(pixel.g) + int(row0[1+x].y()), 0, 255); + pixel.b = clamp(int(pixel.b) + int(row0[1+x].z()), 0, 255); + pixel.a = clamp(int(pixel.a) + int(row0[1+x].w()), 0, 255); + + int r = pixel.r; + int g = pixel.g; + int b = pixel.b; + int a = pixel.a; + + // Convert to our desired size, and reconstruct. + r = PixelFormat::convert(r, 8, rsize); + r = PixelFormat::convert(r, rsize, 8); + + g = PixelFormat::convert(g, 8, gsize); + g = PixelFormat::convert(g, gsize, 8); + + b = PixelFormat::convert(b, 8, bsize); + b = PixelFormat::convert(b, bsize, 8); + + a = PixelFormat::convert(a, 8, asize); + a = PixelFormat::convert(a, asize, 8); + + // Store color. + image->pixel(x, y) = Color32(r, g, b, a); + + // Compute new error. + Vector4 diff(float(int(pixel.r) - r), float(int(pixel.g) - g), float(int(pixel.b) - b), float(int(pixel.a) - a)); + + // Propagate new error. + row0[1+x+1] += 7.0f / 16.0f * diff; + row1[1+x-1] += 3.0f / 16.0f * diff; + row1[1+x+0] += 5.0f / 16.0f * diff; + row1[1+x+1] += 1.0f / 16.0f * diff; + } + + swap(row0, row1); + memset(row1, 0, sizeof(Vector4)*(w+2)); + } + + delete [] row0; + delete [] row1; +} diff --git a/src/nvimage/Quantize.h b/src/nvimage/Quantize.h index 4ce7c2f..5b4a955 100644 --- a/src/nvimage/Quantize.h +++ b/src/nvimage/Quantize.h @@ -3,6 +3,9 @@ #ifndef NV_IMAGE_QUANTIZE_H #define NV_IMAGE_QUANTIZE_H +#include + + namespace nv { class Image; diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index 7710921..0318d4e 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -48,19 +48,37 @@ #define IS_NEGATIVE_FLOAT(x) (IR(x)&SIGN_BITMASK) */ -inline float sqrt_assert(const float f) +inline double sqrt_assert(const double f) +{ + nvDebugCheck(f >= 0.0f); + return sqrt(f); +} + +inline float sqrtf_assert(const float f) { nvDebugCheck(f >= 0.0f); return sqrtf(f); } -inline float acos_assert(const float f) +inline double acos_assert(const double f) +{ + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return acos(f); +} + +inline float acosf_assert(const float f) { nvDebugCheck(f >= -1.0f && f <= 1.0f); return acosf(f); } -inline float asin_assert(const float f) +inline double asin_assert(const double f) +{ + nvDebugCheck(f >= -1.0f && f <= 1.0f); + return asin(f); +} + +inline float asinf_assert(const float f) { nvDebugCheck(f >= -1.0f && f <= 1.0f); return asinf(f); @@ -68,11 +86,11 @@ inline float asin_assert(const float f) // Replace default functions with asserting ones. #define sqrt sqrt_assert -#define sqrtf sqrt_assert +#define sqrtf sqrtf_assert #define acos acos_assert -#define acosf acos_assert +#define acosf acosf_assert #define asin asin_assert -#define asinf asin_assert +#define asinf asinf_assert #if NV_OS_WIN32 #include @@ -136,6 +154,11 @@ inline float lerp(float f0, float f1, float t) return f0 * s + f1 * t; } +inline float square(float f) +{ + return f * f; +} + } // nv #endif // NV_MATH_H diff --git a/src/nvtt/Compressor.cpp b/src/nvtt/Compressor.cpp index d497e8a..8c58674 100644 --- a/src/nvtt/Compressor.cpp +++ b/src/nvtt/Compressor.cpp @@ -697,6 +697,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio SlowCompressor slow; slow.setImage(image, inputOptions.alphaMode); + const bool useCuda = cudaEnabled && image->width() * image->height() >= 512; if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB) { @@ -725,7 +726,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio } else { - if (cudaEnabled) + if (useCuda) { nvDebugCheck(cudaSupported); cuda->setImage(image, inputOptions.alphaMode); @@ -745,7 +746,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio } else { - if (cudaEnabled) + if (useCuda) { nvDebugCheck(cudaSupported); /*cuda*/slow.compressDXT1a(compressionOptions, outputOptions); @@ -764,7 +765,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio } else { - if (cudaEnabled) + if (useCuda) { nvDebugCheck(cudaSupported); cuda->setImage(image, inputOptions.alphaMode); @@ -784,7 +785,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio } else { - if (cudaEnabled) + if (useCuda) { nvDebugCheck(cudaSupported); cuda->setImage(image, inputOptions.alphaMode); diff --git a/src/nvtt/cuda/CudaMath.h b/src/nvtt/cuda/CudaMath.h index f2ed009..50af320 100644 --- a/src/nvtt/cuda/CudaMath.h +++ b/src/nvtt/cuda/CudaMath.h @@ -148,7 +148,7 @@ inline __device__ bool singleColor(const float3 * colors) bool sameColor = false; for (int i = 0; i < 16; i++) { - sameColor &= (colors[idx] == colors[0]); + sameColor &= (colors[i] == colors[0]); } return sameColor; #else diff --git a/src/nvtt/cuda/CudaUtils.cpp b/src/nvtt/cuda/CudaUtils.cpp index 416f7f7..de73f7e 100644 --- a/src/nvtt/cuda/CudaUtils.cpp +++ b/src/nvtt/cuda/CudaUtils.cpp @@ -26,12 +26,14 @@ #include "CudaUtils.h" #if defined HAVE_CUDA -#include +#include +#include #endif using namespace nv; using namespace cuda; +/* @@ Move this to win32 utils or somewhere else. #if NV_OS_WIN32 #define WINDOWS_LEAN_AND_MEAN @@ -68,10 +70,12 @@ static bool isWow32() } #endif +*/ -static bool isCudaDriverAvailable(uint version) +static bool isCudaDriverAvailable(int version) { +#if defined HAVE_CUDA #if NV_OS_WIN32 Library nvcuda("nvcuda.dll"); #else @@ -95,7 +99,21 @@ static bool isCudaDriverAvailable(uint version) if (address == NULL) return false; } - return true; + if (version >= 2020) + { + typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version); + + PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion"); + if (driverGetVersion == NULL) return false; + + int driverVersion; + if (driverGetVersion(&driverVersion) != CUDA_SUCCESS) return false; + + return driverVersion >= version; + } +#endif // HAVE_CUDA + + return false; } @@ -154,7 +172,7 @@ int nv::cuda::deviceCount() int nv::cuda::getFastestDevice() { int max_gflops_device = 0; -#if defined HAVE_CUDA +#if defined HAVE_CUDA int max_gflops = 0; const int device_count = deviceCount(); @@ -180,6 +198,7 @@ int nv::cuda::getFastestDevice() return max_gflops_device; } + /// Activate the given devices. bool nv::cuda::setDevice(int i) {