Merge changes from the Witness.

2013-06-07 17:53:55 +00:00
parent 634229a842
commit 94d0635285
49 changed files with 1974 additions and 625 deletions
--- a/src/nvtt/BlockCompressor.cpp
+++ b/src/nvtt/BlockCompressor.cpp
@ -22,7 +22,7 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#include "CompressorDXT.h"
+#include "BlockCompressor.h"
 #include "OutputOptions.h"
 #include "TaskDispatcher.h"

@ -46,7 +46,7 @@ using namespace nvtt;
 #include <omp.h>
 #endif

-void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
    const uint bs = blockSize();
    const uint bw = (w + 3) / 4;
@ -113,7 +113,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
 */


-struct FixedBlockCompressorContext
+struct ColorBlockCompressorContext
 {
    nvtt::AlphaMode alphaMode;
    uint w, h;
@ -122,13 +122,13 @@ struct FixedBlockCompressorContext

    uint bw, bh, bs;
    uint8 * mem;
-    FixedBlockCompressor * compressor;
+    ColorBlockCompressor * compressor;
 };

 // Each task compresses one block.
-void FixedBlockCompressorTask(void * data, int i)
+void ColorBlockCompressorTask(void * data, int i)
 {
-    FixedBlockCompressorContext * d = (FixedBlockCompressorContext *) data;
+    ColorBlockCompressorContext * d = (ColorBlockCompressorContext *) data;

    uint x = i % d->bw;
    uint y = i / d->bw;
@ -143,11 +143,11 @@ void FixedBlockCompressorTask(void * data, int i)
    }
 }

-void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
    nvDebugCheck(d == 1);

-    FixedBlockCompressorContext context;
+    ColorBlockCompressorContext context;
    context.alphaMode = alphaMode;
    context.w = w;
    context.h = h;
@ -169,7 +169,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
    const uint size = context.bs * count;
    context.mem = new uint8[size];

-    dispatcher->dispatch(FixedBlockCompressorTask, &context, count);
+    dispatcher->dispatch(ColorBlockCompressorTask, &context, count);

    outputOptions.writeData(context.mem, size);

--- a/src/nvtt/BlockCompressor.h
+++ b/src/nvtt/BlockCompressor.h
@ -22,8 +22,8 @@
 // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 // OTHER DEALINGS IN THE SOFTWARE.

-#ifndef NVTT_COMPRESSORDXT_H
-#define NVTT_COMPRESSORDXT_H
+#ifndef NVTT_BLOCKCOMPRESSOR_H
+#define NVTT_BLOCKCOMPRESSOR_H

 #include "Compressor.h"

@ -33,7 +33,7 @@ namespace nv
    struct ColorSet;
    struct ColorBlock;

-    struct FixedBlockCompressor : public CompressorInterface
+    struct ColorBlockCompressor : public CompressorInterface
    {
        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);

@ -52,4 +52,4 @@ namespace nv
 } // nv namespace


-#endif // NVTT_COMPRESSORDXT_H
+#endif // NVTT_BLOCKCOMPRESSOR_H
--- a/src/nvtt/CompressorDX10.h
+++ b/src/nvtt/CompressorDX10.h
@ -25,20 +25,20 @@
 #ifndef NVTT_COMPRESSORDX10_H
 #define NVTT_COMPRESSORDX10_H

-#include "CompressorDXT.h"
+#include "BlockCompressor.h"

 namespace nv
 {
 	struct ColorBlock;

 	// Fast CPU compressors.
-	struct FastCompressorBC4 : public FixedBlockCompressor
+	struct FastCompressorBC4 : public ColorBlockCompressor
 	{
 		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
 		virtual uint blockSize() const { return 8; }
 	};

-	struct FastCompressorBC5 : public FixedBlockCompressor
+	struct FastCompressorBC5 : public ColorBlockCompressor
 	{
 		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
 		virtual uint blockSize() const { return 16; }
@ -46,13 +46,13 @@ namespace nv


 	// Production CPU compressors.
-	struct ProductionCompressorBC4 : public FixedBlockCompressor
+	struct ProductionCompressorBC4 : public ColorBlockCompressor
 	{
 		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
 		virtual uint blockSize() const { return 8; }
 	};

-	struct ProductionCompressorBC5 : public FixedBlockCompressor
+	struct ProductionCompressorBC5 : public ColorBlockCompressor
 	{
 		virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
 		virtual uint blockSize() const { return 16; }
--- a/src/nvtt/CompressorDX11.h
+++ b/src/nvtt/CompressorDX11.h
@ -24,7 +24,7 @@
 #ifndef NVTT_COMPRESSORDX11_H
 #define NVTT_COMPRESSORDX11_H

-#include "CompressorDXT.h"
+#include "BlockCompressor.h"

 namespace nv
 {
--- a/src/nvtt/CompressorDX9.cpp
+++ b/src/nvtt/CompressorDX9.cpp
@ -122,11 +122,7 @@ void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, co
    
    if (set.isSingleColor(/*ignoreAlpha*/true))
    {
-        Color32 c;
-        c.r = uint8(clamp(set.colors[0].x, 0.0f, 1.0f) * 255);
-        c.g = uint8(clamp(set.colors[0].y, 0.0f, 1.0f) * 255);
-        c.b = uint8(clamp(set.colors[0].z, 0.0f, 1.0f) * 255);
-        c.a = 255;
+        Color32 c = toColor32(set.colors[0]);
        OptimalCompress::compressDXT1(c, block);
    }
    else
@ -202,7 +198,6 @@ void CompressorDXT1_Luma::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
    OptimalCompress::compressDXT1_Luma(rgba, block);
 }

-
 void CompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
 {
    BlockDXT3 * block = new(output) BlockDXT3;
--- a/src/nvtt/CompressorDX9.h
+++ b/src/nvtt/CompressorDX9.h
@ -25,38 +25,38 @@
 #ifndef NVTT_COMPRESSORDX9_H
 #define NVTT_COMPRESSORDX9_H

-#include "CompressorDXT.h"
+#include "BlockCompressor.h"

 namespace nv
 {
    struct ColorBlock;

    // Fast CPU compressors.
-    struct FastCompressorDXT1 : public FixedBlockCompressor
+    struct FastCompressorDXT1 : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 8; }
    };

-    struct FastCompressorDXT1a : public FixedBlockCompressor
+    struct FastCompressorDXT1a : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 8; }
    };

-    struct FastCompressorDXT3 : public FixedBlockCompressor
+    struct FastCompressorDXT3 : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 16; }
    };

-    struct FastCompressorDXT5 : public FixedBlockCompressor
+    struct FastCompressorDXT5 : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 16; }
    };

-    struct FastCompressorDXT5n : public FixedBlockCompressor
+    struct FastCompressorDXT5n : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 16; }
@ -71,38 +71,38 @@ namespace nv
        virtual uint blockSize() const { return 8; }
    };
 #else
-    struct CompressorDXT1 : public FixedBlockCompressor
+    struct CompressorDXT1 : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 8; }
    };
 #endif

-    struct CompressorDXT1a : public FixedBlockCompressor
+    struct CompressorDXT1a : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 8; }
    };

-    struct CompressorDXT1_Luma : public FixedBlockCompressor
+    struct CompressorDXT1_Luma : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 8; }
    };

-    struct CompressorDXT3 : public FixedBlockCompressor
+    struct CompressorDXT3 : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 16; }
    };

-    struct CompressorDXT5 : public FixedBlockCompressor
+    struct CompressorDXT5 : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 16; }
    };

-    struct CompressorDXT5n : public FixedBlockCompressor
+    struct CompressorDXT5n : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 16; }
@ -137,7 +137,7 @@ namespace nv
 #endif

 #if defined(HAVE_STB)
-    struct StbCompressorDXT1 : public FixedBlockCompressor
+    struct StbCompressorDXT1 : public ColorBlockCompressor
    {
        virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
        virtual uint blockSize() const { return 8; }
--- a/src/nvtt/CompressorRGB.cpp
+++ b/src/nvtt/CompressorRGB.cpp
@ -349,20 +349,23 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
                }
                else
                {
-                    Color32 c;
-                    if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) {
-                        c.r = uint8(clamp(r * 255, 0.0f, 255.0f));
-                        c.g = uint8(clamp(g * 255, 0.0f, 255.0f));
-                        c.b = uint8(clamp(b * 255, 0.0f, 255.0f));
-                        c.a = uint8(clamp(a * 255, 0.0f, 255.0f));
-                    }
+                    // We first convert to 16 bits, then to the target size. @@ If greater than 16 bits, this will truncate and bitexpand.
+                    
                    // @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt

+                    int ir, ig, ib, ia;
+                    if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) {
+                        ir = iround(clamp(r * 65535.0f, 0.0f, 65535.0f));
+                        ig = iround(clamp(g * 65535.0f, 0.0f, 65535.0f));
+                        ib = iround(clamp(b * 65535.0f, 0.0f, 65535.0f));
+                        ia = iround(clamp(a * 65535.0f, 0.0f, 65535.0f));
+                    }
+
                    uint p = 0;
-                    p |= PixelFormat::convert(c.r, 8, rsize) << rshift;
-                    p |= PixelFormat::convert(c.g, 8, gsize) << gshift;
-                    p |= PixelFormat::convert(c.b, 8, bsize) << bshift;
-                    p |= PixelFormat::convert(c.a, 8, asize) << ashift;
+                    p |= PixelFormat::convert(ir, 16, rsize) << rshift;
+                    p |= PixelFormat::convert(ig, 16, gsize) << gshift;
+                    p |= PixelFormat::convert(ib, 16, bsize) << bshift;
+                    p |= PixelFormat::convert(ia, 16, asize) << ashift;

                    stream.putBits(p, bitCount);
                }
--- a/src/nvtt/CubeSurface.cpp
+++ b/src/nvtt/CubeSurface.cpp
@ -429,6 +429,12 @@ void CubeSurface::range(int channel, float * minimum_ptr, float * maximum_ptr) c
    *maximum_ptr = maximum;
 }

+void CubeSurface::clamp(int channel, float low/*= 0.0f*/, float high/*= 1.0f*/) {
+    for (int f = 0; f < 6; f++) {
+        m->face[f].clamp(channel, low, high);
+    }
+}
+


 #include "nvmath/SphericalHarmonic.h"
@ -470,13 +476,114 @@ CubeSurface CubeSurface::irradianceFilter(int size, EdgeFixup fixupMethod) const
 }


-// Warp uv coordinate from [-1, 1] to
-/*float warp(float u, int size) {
-
-}*/


+// Convolve filter against this cube.
+Vector3 CubeSurface::Private::applyAngularFilter(const Vector3 & filterDir, float coneAngle, float * filterTable, int tableSize)
+{
+    const float cosineConeAngle = cos(coneAngle);
+    nvDebugCheck(cosineConeAngle >= 0);

+    Vector3 color(0);
+    float sum = 0;
+
+    // Things I have tried to speed this up:
+    // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere.
+    // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case.
+    // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex.
+
+    // What AMD CubeMapGen does:
+    // - Compute conservative bounds on the primary face, wrap around the adjacent faces.
+
+
+    // For each texel of the input cube.
+    for (uint f = 0; f < 6; f++) {
+
+        // Test face cone agains filter cone.
+        float cosineFaceAngle = dot(filterDir, faceNormals[f]);
+        float faceAngle = acosf(cosineFaceAngle);
+
+        if (faceAngle > coneAngle + atanf(sqrtf(2))) {
+            // Skip face.
+            continue;
+        }
+
+        const int L = toI32(edgeLength-1);
+        int x0 = 0, x1 = L;
+        int y0 = 0, y1 = L;
+
+#if 0
+        float u0 = -1;
+        float u1 = 1;
+        float v0 = -1;
+        float v1 = 1;
+
+        // @@ Compute uvs.
+
+        // Expand uv coordinates from [-1,1] to [0, edgeLength)
+        u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f;
+        v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f;
+        u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f;
+        v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f;
+        nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f);
+        nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f);
+        nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f);
+        nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f);
+
+        x0 = clamp(ifloor(u0), 0, L);
+        y0 = clamp(ifloor(v0), 0, L);
+        x1 = clamp(iceil(u1), 0, L);
+        y1 = clamp(iceil(v1), 0, L);
+#endif
+
+        nvDebugCheck(x1 >= x0);
+        nvDebugCheck(y1 >= y0);
+
+        if (x1 == x0 || y1 == y0) {
+            // Skip this face.
+            continue;
+        }
+
+
+        const Surface & inputFace = face[f];
+        const FloatImage * inputImage = inputFace.m->image;
+
+        for (int y = y0; y <= y1; y++) {
+            bool inside = false;
+            for (int x = x0; x <= x1; x++) {
+
+                Vector3 dir = texelTable->direction(f, x, y);
+                float cosineAngle = dot(dir, filterDir);
+
+                if (cosineAngle > cosineConeAngle) {
+                    float solidAngle = texelTable->solidAngle(f, x, y);
+                    //float scale = powf(saturate(cosineAngle), cosinePower);
+                    
+                    int idx = int(saturate(cosineAngle) * (tableSize - 1));
+                    float scale = filterTable[idx]; // @@ Do bilinear interpolation?
+
+                    float contribution = solidAngle * scale;
+
+                    sum += contribution;
+                    color.x += contribution * inputImage->pixel(0, x, y, 0);
+                    color.y += contribution * inputImage->pixel(1, x, y, 0);
+                    color.z += contribution * inputImage->pixel(2, x, y, 0);
+
+                    inside = true;
+                }
+                else if (inside) {
+                    // Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row.
+                    // We could do the same thing for the columns and skip entire rows.
+                    break;
+                }
+            }
+        }
+    }
+
+    color *= (1.0f / sum);
+
+    return color;
+}

 // We want to find the alpha such that:
 // cos(alpha)^cosinePower = epsilon
@ -491,6 +598,7 @@ CubeSurface CubeSurface::irradianceFilter(int size, EdgeFixup fixupMethod) const
 // - parallelize. Done.
 // - use ISPC?

+
 // Convolve filter against this cube.
 Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float coneAngle, float cosinePower)
 {
@ -500,6 +608,15 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
    Vector3 color(0);
    float sum = 0;

+    // Things I have tried to speed this up:
+    // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere.
+    // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case.
+    // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex.
+
+    // What AMD CubeMapGen does:
+    // - Compute conservative bounds on the primary face, wrap around the adjacent faces.
+
+
    // For each texel of the input cube.
    for (uint f = 0; f < 6; f++) {

@ -512,163 +629,36 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
            continue;
        }

-        // @@ We could do a less conservative test and test the face frustum against the cone...
-        // Or maybe easier: the face quad against the cone.
-
-        // Compute bounding box of cone intersection against face.
-        // The intersection of the cone with the face is an elipse, we want the extents of that elipse.
-        // @@ Hmm... we could even rasterize an elipse! Sounds like FUN!
-
        const int L = toI32(edgeLength-1);
        int x0 = 0, x1 = L;
        int y0 = 0, y1 = L;

-        if (false) {
-            // New approach?
+#if 0
+        float u0 = -1;
+        float u1 = 1;
+        float v0 = -1;
+        float v1 = 1;

-            // For each face, we are looking for 4 planes that bound the cone.
+        // @@ Compute uvs.

-            // All planes go through the origin.
-            // Plane fully determined by its normal.
-            // We only care about planes aligned to one axis. So, for the XY face, we have 4 planes:
+        // Expand uv coordinates from [-1,1] to [0, edgeLength)
+        u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f;
+        v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f;
+        u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f;
+        v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f;
+        nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f);
+        nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f);
+        nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f);
+        nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f);

-            // Plane goes through origin.
-            // Plane normal is unit length.
-
-            // Plane must be tangent to cone ->
-            //  angle between plane normal and cone axis is 90 - cone angle & 90 + cone angle
-            //  dot(N, D) == cos(90 - cone angle)
-            //  dot(N, D) == cos(90 + cone angle)
-
-            // Plane must contain face UV axis
-
-            // Find the 4 planes and how they intersect the unit face, which gives us (u0,v0, u1,v1).
-
-            // Expand uv coordinates, clamp to
-        }
-
-        // @@ Ugh. This is wrong, or only right when filterDir is aligned to one axis.
-        if (false) {
-            // uv coordinates corresponding to filterDir.
-            //float u = dot(filterDir, faceU[f]) / cosineFaceAngle;
-            //float v = dot(filterDir, faceV[f]) / cosineFaceAngle;
-
-            // Angular coordinates corresponding to filterDir with respect to faceNormal.
-            float atu = atan2(dot(filterDir, faceU[f]), cosineFaceAngle);
-            float atv = atan2(dot(filterDir, faceV[f]), cosineFaceAngle);
-
-            // Expand angles and project back to the face plane.
-            float u0 = tan(clamp(atu - coneAngle, -PI/4, PI/4));
-            float v0 = tan(clamp(atv - coneAngle, -PI/4, PI/4));
-            float u1 = tan(clamp(atu + coneAngle, -PI/4, PI/4));
-            float v1 = tan(clamp(atv + coneAngle, -PI/4, PI/4));
-            nvDebugCheck(u0 >= -1 && u0 <= 1);
-            nvDebugCheck(v0 >= -1 && v0 <= 1);
-            nvDebugCheck(u1 >= -1 && u1 <= 1);
-            nvDebugCheck(v1 >= -1 && v1 <= 1);
-
-            // Expand uv coordinates from [-1,1] to [0, edgeLength)
-            u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f;
-            v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f;
-            u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f;
-            v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f;
-            nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f);
-            nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f);
-            nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f);
-            nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f);
-
-            x0 = clamp(ifloor(u0), 0, L);
-            y0 = clamp(ifloor(v0), 0, L);
-            x1 = clamp(iceil(u1), 0, L);
-            y1 = clamp(iceil(v1), 0, L);
-
-            nvDebugCheck(x1 >= x0);
-            nvDebugCheck(y1 >= y0);
-        }
-
-        // This is elegant and all that, but the problem is that the projection is not always an ellipse, but often a parabola.
-        // A parabola has infinite bounds, so this approach is not very practical. Ugh.
-        if (false) {
-            //nvCheck(cosineFaceAngle >= 0.0f); @@ Not true for wide angles.
-
-            // Focal point in cartessian coordinates:
-            Vector3 F = Vector3(dot(faceU[f], filterDir), dot(faceV[f], filterDir), cosineFaceAngle);
-
-            // Focal point in polar coordinates:
-            Vector2 Fp = toPolar(F);
-            nvCheck(Fp.y >= 0.0f);  // top
-            //nvCheck(Fp.y <= PI/2);  // horizon
-
-            // If this is an ellipse:
-            if (Fp.y + coneAngle < PI/2) {
-                nvCheck(Fp.y - coneAngle > -PI/2);
-
-                // Major axis endpoints:
-                Vector2 Fa1 = toPlane(Fp.x, Fp.y - cosineFaceAngle);  // near endpoint.
-                Vector2 Fa2 = toPlane(Fp.x, Fp.y + cosineFaceAngle);  // far endpoint.
-                nvCheck(length(Fa1) <= length(Fa2));
-
-                // Ellipse center:
-                Vector2 Fc = (Fa1 + Fa2) * 0.5f;
-
-                // Major radius:
-                float a = 0.5f * length(Fa1 - Fa2);
-
-                // Focal point:
-                Vector2 F1 = toPlane(Fp.x, Fp.y);
-
-                // If we project Fa1, Fa2, Fc, F1 onto the filter direction, then:
-                float da1 = dot(Fa1, F.xy()) / fabs(cosineFaceAngle);
-                float d1 = dot(F1, F.xy()) / fabs(cosineFaceAngle);
-                float dc = dot(Fc, F.xy()) / fabs(cosineFaceAngle);
-                float da2 = dot(Fa2, F.xy()) / fabs(cosineFaceAngle);
-                //nvDebug("%f <= %f <= %f <= %f   (%d: %f %f | %f %f)\n", da1, d1, dc, da2, f, F.x, F.y, Fp.y - coneAngle, Fp.y + coneAngle);
-                //nvCheck(da1 <= d1 && d1 <= dc && dc <= da2);
-
-                // Translate focal point relative to center:
-                F1 -= Fc;
-
-                // Focal distance:
-                //float f = length(F1);  // @@ Overriding f!
-
-                // Minor radius:
-                //float b = sqrtf(a*a - f*f);
-
-                // Second order quadric coefficients:
-                float A = a*a - F1.x * F1.x;
-                nvCheck(A >= 0);
-
-                float B = a*a - F1.y * F1.y;
-                nvCheck(B >= 0);
-
-                // Floating point bounds:
-                float u0 = clamp(Fc.x - sqrtf(B), -1.0f, 1.0f);
-                float u1 = clamp(Fc.x + sqrtf(B), -1.0f, 1.0f);
-                float v0 = clamp(Fc.y - sqrtf(A), -1.0f, 1.0f);
-                float v1 = clamp(Fc.y + sqrtf(A), -1.0f, 1.0f);
-
-                // Expand uv coordinates from [-1,1] to [0, edgeLength)
-                u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f;
-                v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f;
-                u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f;
-                v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f;
-                //nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f);
-                //nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f);
-                //nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f);
-                //nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f);
-
-                x0 = clamp(ifloor(u0), 0, L);
-                y0 = clamp(ifloor(v0), 0, L);
-                x1 = clamp(iceil(u1), 0, L);
-                y1 = clamp(iceil(v1), 0, L);
-
-                nvDebugCheck(x1 >= x0);
-                nvDebugCheck(y1 >= y0);
-            }
-
-            // @@ What to do with parabolas?
-        }
+        x0 = clamp(ifloor(u0), 0, L);
+        y0 = clamp(ifloor(v0), 0, L);
+        x1 = clamp(iceil(u1), 0, L);
+        y1 = clamp(iceil(v1), 0, L);
+#endif

+        nvDebugCheck(x1 >= x0);
+        nvDebugCheck(y1 >= y0);

        if (x1 == x0 || y1 == y0) {
            // Skip this face.
@ -714,17 +704,18 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,

 #include "nvthread/ParallelFor.h"

-struct ApplyCosinePowerFilterContext {
+struct ApplyAngularFilterContext {
    CubeSurface::Private * inputCube;
    CubeSurface::Private * filteredCube;
    float coneAngle;
-    float cosinePower;
+    float * filterTable;
+    int tableSize;
    EdgeFixup fixupMethod;
 };

-void ApplyCosinePowerFilterTask(void * context, int id)
+void ApplyAngularFilterTask(void * context, int id)
 {
-    ApplyCosinePowerFilterContext * ctx = (ApplyCosinePowerFilterContext *)context;
+    ApplyAngularFilterContext * ctx = (ApplyAngularFilterContext *)context;

    int size = ctx->filteredCube->edgeLength;

@ -739,7 +730,7 @@ void ApplyCosinePowerFilterTask(void * context, int id)
    const Vector3 filterDir = texelDirection(f, x, y, size, ctx->fixupMethod);

    // Convolve filter against cube.
-    Vector3 color = ctx->inputCube->applyCosinePowerFilter(filterDir, ctx->coneAngle, ctx->cosinePower);
+    Vector3 color = ctx->inputCube->applyAngularFilter(filterDir, ctx->coneAngle, ctx->filterTable, ctx->tableSize);

    filteredImage->pixel(0, idx) = color.x;
    filteredImage->pixel(1, idx) = color.y;
@ -749,8 +740,6 @@ void ApplyCosinePowerFilterTask(void * context, int id)

 CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const
 {
-    const uint edgeLength = m->edgeLength;
-
    // Allocate output cube.
    CubeSurface filteredCube;
    filteredCube.m->allocate(size);
@ -782,14 +771,24 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixu
        }
    }*/

-    ApplyCosinePowerFilterContext context;
+    ApplyAngularFilterContext context;
    context.inputCube = m;
    context.filteredCube = filteredCube.m;
    context.coneAngle = coneAngle;
-    context.cosinePower = cosinePower;
    context.fixupMethod = fixupMethod;

-    nv::ParallelFor parallelFor(ApplyCosinePowerFilterTask, &context);
+    context.tableSize = 512;
+    context.filterTable = new float[context.tableSize];
+
+    // @@ Instead of looking up table between [0 - 1] we should probably use [cos(coneAngle), 1]
+
+    for (int i = 0; i < context.tableSize; i++) {
+        float f = float(i) / (context.tableSize - 1);
+        context.filterTable[i] = powf(f, cosinePower);
+    }
+    
+
+    nv::ParallelFor parallelFor(ApplyAngularFilterTask, &context);
    parallelFor.run(6 * size * size);

    // @@ Implement edge averaging.
@ -816,6 +815,72 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixu
 }


+// Sample cubemap in the given direction.
+Vector3 CubeSurface::Private::sample(const Vector3 & dir)
+{
+    int f = -1;
+    if (fabs(dir.x) > fabs(dir.y) && fabs(dir.x) > fabs(dir.z)) {
+        if (dir.x > 0) f = 0;
+        else f = 1;
+    }
+    else if (fabs(dir.y) > fabs(dir.z)) {
+        if (dir.y > 0) f = 2;
+        else f = 3;
+    }
+    else {
+        if (dir.z > 0) f = 4;
+        else f = 5;
+    }
+    nvDebugCheck(f != -1);
+
+    // uv coordinates corresponding to filterDir.
+    float u = dot(dir, faceU[f]);
+    float v = dot(dir, faceV[f]);
+
+    FloatImage * img = face[f].m->image;
+
+    Vector3 color;
+    color.x = img->sampleLinearClamp(0, u, v);
+    color.y = img->sampleLinearClamp(1, u, v);
+    color.z = img->sampleLinearClamp(2, u, v);
+
+    return color;
+}
+
+// @@ Not tested!
+CubeSurface CubeSurface::fastResample(int size, EdgeFixup fixupMethod) const
+{
+    // Allocate output cube.
+    CubeSurface resampledCube;
+    resampledCube.m->allocate(size);
+
+    // For each texel of the output cube.
+    for (uint f = 0; f < 6; f++) {
+        nvtt::Surface resampledFace = resampledCube.m->face[f];
+        FloatImage * resampledImage = resampledFace.m->image;
+
+        for (uint y = 0; y < uint(size); y++) {
+            for (uint x = 0; x < uint(size); x++) {
+
+                const Vector3 filterDir = texelDirection(f, x, y, size, fixupMethod);
+
+                Vector3 color = m->sample(filterDir);
+
+                resampledImage->pixel(0, x, y, 0) = color.x;
+                resampledImage->pixel(1, x, y, 0) = color.y;
+                resampledImage->pixel(2, x, y, 0) = color.z;
+            }
+        }
+    }
+
+    // @@ Implement edge averaging. Share this code with cosinePowerFilter
+    if (fixupMethod == EdgeFixup_Average) {
+    }
+
+    return resampledCube;
+}
+
+
 void CubeSurface::toLinear(float gamma)
 {
    if (isNull()) return;
--- a/src/nvtt/CubeSurface.h
+++ b/src/nvtt/CubeSurface.h
@ -94,8 +94,11 @@ namespace nvtt
        }

        // Filtering helpers:
+        nv::Vector3 applyAngularFilter(const nv::Vector3 & dir, float coneAngle, float * filterTable, int tableSize);
        nv::Vector3 applyCosinePowerFilter(const nv::Vector3 & dir, float coneAngle, float cosinePower);

+        nv::Vector3 sample(const nv::Vector3 & dir);
+
        uint edgeLength;
        Surface face[6];
        TexelTable * texelTable;
--- a/src/nvtt/InputOptions.h
+++ b/src/nvtt/InputOptions.h
@ -1,82 +1,82 @@
-// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
-// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NVTT_INPUTOPTIONS_H
-#define NVTT_INPUTOPTIONS_H
-
-#include "nvtt.h"
-
-#include "nvmath/Vector.h"
-
-
-namespace nvtt
-{
-
-    struct InputOptions::Private
-    {
-        Private() : images(NULL) {}
-
-        WrapMode wrapMode;
-        TextureType textureType;
-        InputFormat inputFormat;
-        AlphaMode alphaMode;
-
-        uint width;
-        uint height;
-        uint depth;
-        uint faceCount;
-        uint mipmapCount;
-        uint imageCount;
-
-        void ** images;
-
-        // Gamma conversion.
-        float inputGamma;
-        float outputGamma;
-
-        // Mipmap generation options.
-        bool generateMipmaps;
-        int maxLevel;
-        MipmapFilter mipmapFilter;
-
-        // Kaiser filter parameters.
-        float kaiserWidth;
-        float kaiserAlpha;
-        float kaiserStretch;
-
-        // Normal map options.
-        bool isNormalMap;
-        bool normalizeMipmaps;
-        bool convertToNormalMap;
-        nv::Vector4 heightFactors;
-        nv::Vector4 bumpFrequencyScale;
-
-        // Adjust extents.
-        uint maxExtent;
-        RoundMode roundMode;
-    };
-
-} // nvtt namespace
-
-#endif // NVTT_INPUTOPTIONS_H
+// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
+// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
+// 
+// Permission is hereby granted, free of charge, to any person
+// obtaining a copy of this software and associated documentation
+// files (the "Software"), to deal in the Software without
+// restriction, including without limitation the rights to use,
+// copy, modify, merge, publish, distribute, sublicense, and/or sell
+// copies of the Software, and to permit persons to whom the
+// Software is furnished to do so, subject to the following
+// conditions:
+// 
+// The above copyright notice and this permission notice shall be
+// included in all copies or substantial portions of the Software.
+// 
+// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+// OTHER DEALINGS IN THE SOFTWARE.
+
+#ifndef NVTT_INPUTOPTIONS_H
+#define NVTT_INPUTOPTIONS_H
+
+#include "nvtt.h"
+
+#include "nvmath/Vector.h"
+
+
+namespace nvtt
+{
+
+    struct InputOptions::Private
+    {
+        Private() : images(NULL) {}
+
+        WrapMode wrapMode;
+        TextureType textureType;
+        InputFormat inputFormat;
+        AlphaMode alphaMode;
+
+        uint width;
+        uint height;
+        uint depth;
+        uint faceCount;
+        uint mipmapCount;
+        uint imageCount;
+
+        void ** images;
+
+        // Gamma conversion.
+        float inputGamma;
+        float outputGamma;
+
+        // Mipmap generation options.
+        bool generateMipmaps;
+        int maxLevel;
+        MipmapFilter mipmapFilter;
+
+        // Kaiser filter parameters.
+        float kaiserWidth;
+        float kaiserAlpha;
+        float kaiserStretch;
+
+        // Normal map options.
+        bool isNormalMap;
+        bool normalizeMipmaps;
+        bool convertToNormalMap;
+        nv::Vector4 heightFactors;
+        nv::Vector4 bumpFrequencyScale;
+
+        // Adjust extents.
+        uint maxExtent;
+        RoundMode roundMode;
+    };
+
+} // nvtt namespace
+
+#endif // NVTT_INPUTOPTIONS_H
--- a/src/nvtt/Surface.cpp
+++ b/src/nvtt/Surface.cpp
@ -40,6 +40,10 @@
 #include <float.h>
 #include <string.h> // memset, memcpy

+#if NV_CC_GNUC
+#include <math.h> // exp2f and log2f
+#endif
+
 using namespace nv;
 using namespace nvtt;

@ -101,6 +105,20 @@ namespace
    }*/
 }

+bool nv::canMakeNextMipmap(uint w, uint h, uint d, uint min_size)
+{
+    if (min_size==1u) {  
+        if(w==1u && h==1u && d==1u) {
+            return false;
+        }
+    }
+    else if (((w <= min_size || h <= min_size) && d == 1u)) {
+        return false;
+    }
+
+    return true;
+}
+
 uint nv::countMipmaps(uint w)
 {
    uint mipmap = 0;
@ -127,6 +145,21 @@ uint nv::countMipmaps(uint w, uint h, uint d)
    return mipmap + 1;
 }

+uint nv::countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size)
+{
+    uint mipmap = 0;
+
+    while (canMakeNextMipmap(w, h, d, min_size)) {
+        w = max(1U, w / 2);
+        h = max(1U, h / 2);
+        d = max(1U, d / 2);
+        mipmap++;
+    }
+
+   return mipmap + 1;
+}
+
+
 uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlignmentInBytes, Format format)
 {
    if (format == Format_RGBA) {
@ -308,10 +341,18 @@ int Surface::countMipmaps() const
    return ::countMipmaps(m->image->width(), m->image->height(), 1);
 }

+int Surface::countMipmaps(int min_size) const
+{
+    if (m->image == NULL) return 0;
+    return ::countMipmapsWithMinSize(m->image->width(), m->image->height(), 1, min_size);
+}
+
 float Surface::alphaTestCoverage(float alphaRef/*= 0.5*/) const
 {
    if (m->image == NULL) return 0.0f;

+    alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256);
+
    return m->image->alphaTestCoverage(alphaRef, 3);
 }

@ -348,7 +389,7 @@ float Surface::average(int channel, int alpha_channel/*= -1*/, float gamma /*= 2
    // Avoid division by zero.
    if (denom == 0.0f) return 0.0f;

-    return sum / denom;
+    return powf(sum / denom, 1.0f/gamma);
 }

 const float * Surface::data() const
@ -356,6 +397,13 @@ const float * Surface::data() const
    return m->image->channel(0);
 }

+const float * Surface::channel(int i) const
+{
+    if (i < 0 || i > 3) return NULL;
+    return m->image->channel(i);
+}
+
+
 void Surface::histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const
 {
    // We assume it's clear in case we want to accumulate multiple histograms.
@ -378,12 +426,14 @@ void Surface::histogram(int channel, float rangeMin, float rangeMax, int binCoun
    }
 }

-void Surface::range(int channel, float * rangeMin, float * rangeMax) const
+void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_channel/*= -1*/, float alpha_ref/*= 0.f*/) const
 {
    Vector2 range(FLT_MAX, -FLT_MAX);

    FloatImage * img = m->image;

+    if (alpha_channel == -1) { // no alpha channel; just like the original range function
+
    if (m->image != NULL)
    {
        float * c = img->channel(channel);
@ -395,6 +445,25 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax) const
            if (f > range.y) range.y = f;
        }
    }
+    }
+    else { // use alpha test to ignore some pixels
+        //note, it's quite possible to get FLT_MAX,-FLT_MAX back if all pixels fail the test
+
+        if (m->image != NULL)
+        {
+            const float * c = img->channel(channel);
+            const float * a = img->channel(alpha_channel);
+
+            const uint count = img->pixelCount();
+            for (uint p = 0; p < count; p++) {
+                if(a[p]>alpha_ref) {
+                    float f = c[p];
+                    if (f < range.x) range.x = f;
+                    if (f > range.y) range.y = f;
+                }
+            }
+        }
+    }

    *rangeMin = range.x;
    *rangeMax = range.y;
@ -423,16 +492,44 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
    return true;
 }

-bool Surface::save(const char * fileName) const
+bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) const
 {
-    if (m->image != NULL)
-    {
-        return ImageIO::saveFloat(fileName, m->image, 0, 4);
+    if (m->image == NULL) {
+        return false;
    }

-    return false;
+    if (hdr) {
+        return ImageIO::saveFloat(fileName, m->image, 0, 4);
+    }
+    else {
+        AutoPtr<Image> image(m->image->createImage(0, 4));
+        nvCheck(image != NULL);
+
+        if (hasAlpha) {
+            image->setFormat(Image::Format_ARGB);
+        }
+
+        return ImageIO::save(fileName, image.ptr());
+    }
 }

+
+bool Surface::setImage(int w, int h, int d)
+{
+    detach();
+
+    if (m->image == NULL) {
+        m->image = new FloatImage();
+    }
+    m->image->allocate(4, w, h, d);
+    m->type = (d == 1) ? TextureType_2D : TextureType_3D;
+
+    m->image->clear();
+
+    return true;
+}
+
+
 #if 0 //NV_OS_WIN32

 #include <windows.h>
@ -449,13 +546,14 @@ static int filter(unsigned int code, struct _EXCEPTION_POINTERS *ep) {
 }

 #define TRY __try
-    
 #define CATCH __except (filter(GetExceptionCode(), GetExceptionInformation()))
-#else
+
+#else // 0
+
 #define TRY if (true)
 #define CATCH else
-#endif

+#endif

 bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void * data)
 {
@ -553,13 +651,13 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
        const uint8 * bsrc = (const uint8 *)b;
        const uint8 * asrc = (const uint8 *)a;

-        try {
+        TRY {
            for (int i = 0; i < count; i++) rdst[i] = float(rsrc[i]) / 255.0f;
            for (int i = 0; i < count; i++) gdst[i] = float(gsrc[i]) / 255.0f;
            for (int i = 0; i < count; i++) bdst[i] = float(bsrc[i]) / 255.0f;
            for (int i = 0; i < count; i++) adst[i] = float(asrc[i]) / 255.0f;
        }
-        catch(...) {
+        CATCH {
            return false;
        }
    }
@ -570,13 +668,13 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
        const uint16 * bsrc = (const uint16 *)b;
        const uint16 * asrc = (const uint16 *)a;

-        try {
+        TRY {
            for (int i = 0; i < count; i++) ((uint32 *)rdst)[i] = half_to_float(rsrc[i]);
            for (int i = 0; i < count; i++) ((uint32 *)gdst)[i] = half_to_float(gsrc[i]);
            for (int i = 0; i < count; i++) ((uint32 *)bdst)[i] = half_to_float(bsrc[i]);
            for (int i = 0; i < count; i++) ((uint32 *)adst)[i] = half_to_float(asrc[i]);
        }
-        catch(...) {
+        CATCH {
            return false;
        }
    }
@ -587,13 +685,13 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
        const float * bsrc = (const float *)b;
        const float * asrc = (const float *)a;

-        try {
+        TRY {
            memcpy(rdst, rsrc, count * sizeof(float));
            memcpy(gdst, gsrc, count * sizeof(float));
            memcpy(bdst, bsrc, count * sizeof(float));
            memcpy(adst, asrc, count * sizeof(float));
        }
-        catch(...) {
+        CATCH {
            return false;
        }
    }
@ -624,87 +722,87 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi

    const uint8 * ptr = (const uint8 *)data;

-    try {
+    TRY {
        for (int y = 0; y < bh; y++)
        {
            for (int x = 0; x < bw; x++)
            {
                ColorBlock colors;

-		if (format == nvtt::Format_BC1)
-		{
-		    const BlockDXT1 * block = (const BlockDXT1 *)ptr;
+                if (format == nvtt::Format_BC1)
+                {
+                    const BlockDXT1 * block = (const BlockDXT1 *)ptr;

-		    if (decoder == Decoder_D3D10) {
-			    block->decodeBlock(&colors, false);
-		    }
-		    else if (decoder == Decoder_D3D9) {
-			    block->decodeBlock(&colors, false);
-		    }
-		    else if (decoder == Decoder_NV5x) {
-			    block->decodeBlockNV5x(&colors);
-		    }
-		}
-		else if (format == nvtt::Format_BC2)
-		{
-		    const BlockDXT3 * block = (const BlockDXT3 *)ptr;
+                    if (decoder == Decoder_D3D10) {
+	                    block->decodeBlock(&colors, false);
+                    }
+                    else if (decoder == Decoder_D3D9) {
+	                    block->decodeBlock(&colors, false);
+                    }
+                    else if (decoder == Decoder_NV5x) {
+	                    block->decodeBlockNV5x(&colors);
+                    }
+                }
+                else if (format == nvtt::Format_BC2)
+                {
+                    const BlockDXT3 * block = (const BlockDXT3 *)ptr;

-		    if (decoder == Decoder_D3D10) {
-			    block->decodeBlock(&colors, false);
-		    }
-		    else if (decoder == Decoder_D3D9) {
-			    block->decodeBlock(&colors, false);
-		    }
-		    else if (decoder == Decoder_NV5x) {
-			    block->decodeBlockNV5x(&colors);
-		    }
-		}
-		else if (format == nvtt::Format_BC3)
-		{
-		    const BlockDXT5 * block = (const BlockDXT5 *)ptr;
+                    if (decoder == Decoder_D3D10) {
+	                    block->decodeBlock(&colors, false);
+                    }
+                    else if (decoder == Decoder_D3D9) {
+	                    block->decodeBlock(&colors, false);
+                    }
+                    else if (decoder == Decoder_NV5x) {
+	                    block->decodeBlockNV5x(&colors);
+                    }
+                }
+                else if (format == nvtt::Format_BC3)
+                {
+                    const BlockDXT5 * block = (const BlockDXT5 *)ptr;

-		    if (decoder == Decoder_D3D10) {
-			    block->decodeBlock(&colors, false);
-		    }
-		    else if (decoder == Decoder_D3D9) {
-			    block->decodeBlock(&colors, false);
-		    }
-		    else if (decoder == Decoder_NV5x) {
-			    block->decodeBlockNV5x(&colors);
-		    }
-		}
-		else if (format == nvtt::Format_BC4)
-		{
-            const BlockATI1 * block = (const BlockATI1 *)ptr;
-            block->decodeBlock(&colors, decoder == Decoder_D3D9);
+                    if (decoder == Decoder_D3D10) {
+	                    block->decodeBlock(&colors, false);
+                    }
+                    else if (decoder == Decoder_D3D9) {
+	                    block->decodeBlock(&colors, false);
+                    }
+                    else if (decoder == Decoder_NV5x) {
+	                    block->decodeBlockNV5x(&colors);
+                    }
+                }
+                else if (format == nvtt::Format_BC4)
+                {
+                    const BlockATI1 * block = (const BlockATI1 *)ptr;
+                    block->decodeBlock(&colors, decoder == Decoder_D3D9);
+                }
+                else if (format == nvtt::Format_BC5)
+                {
+                    const BlockATI2 * block = (const BlockATI2 *)ptr;
+                    block->decodeBlock(&colors, decoder == Decoder_D3D9);
+                }
+
+                for (int yy = 0; yy < 4; yy++)
+                {
+                    for (int xx = 0; xx < 4; xx++)
+                    {
+                        Color32 c = colors.color(xx, yy);
+
+                        if (x * 4 + xx < w && y * 4 + yy < h)
+                        {
+                            m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f;
+                            m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f;
+                            m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f;
+                            m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f;
+                        }
+                    }
+                }
+
+                ptr += bs;
+            }
        }
-        else if (format == nvtt::Format_BC5)
-        {
-            const BlockATI2 * block = (const BlockATI2 *)ptr;
-            block->decodeBlock(&colors, decoder == Decoder_D3D9);
-        }
-
-		for (int yy = 0; yy < 4; yy++)
-		{
-		    for (int xx = 0; xx < 4; xx++)
-		    {
-			Color32 c = colors.color(xx, yy);
-
-			if (x * 4 + xx < w && y * 4 + yy < h)
-			{
-			    m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f;
-			    m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f;
-			    m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f;
-			    m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f;
-			}
-		    }
-		}
-
-		ptr += bs;
-	    }
-	}
    }
-    catch(...) {
+    CATCH {
        return false;
    }

@ -812,6 +910,43 @@ void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth
    m->image = img;
 }

+void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter)
+{
+    if (isNull()) return;
+
+    float filterWidth;
+    float params[2];
+    getDefaultFilterWidthAndParams(filter, &filterWidth, params);
+
+    int w = m->image->width();
+    int h = m->image->height();
+    int d = m->image->depth();
+
+    getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type);
+
+    if (m->type == TextureType_2D) 
+    {
+        nvDebugCheck(d==1);
+        int md = nv::min(w,h);
+        w = md;
+        h = md;
+    }
+    else if (m->type == TextureType_Cube)
+    {
+        nvDebugCheck(d==1);
+        nvDebugCheck(w==h);
+    }
+    else if (m->type == TextureType_3D)
+    {
+        int md = nv::min(nv::min(w,h),d);
+        w = md;
+        h = md;
+        d = md;
+    }
+
+    resize(w, h, d, filter, filterWidth, params);
+}
+
 void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter)
 {
    float filterWidth;
@ -834,18 +969,26 @@ void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, fl
    resize(w, h, d, filter, filterWidth, params);
 }

-bool Surface::buildNextMipmap(MipmapFilter filter)
+bool Surface::canMakeNextMipmap(int min_size /*= 1*/)
+{
+    if (isNull()) return false;
+
+    return nv::canMakeNextMipmap(width(), height(), depth(), min_size);
+}
+
+
+bool Surface::buildNextMipmap(MipmapFilter filter, int min_size /*= 1*/)
 {
    float filterWidth;
    float params[2];
    getDefaultFilterWidthAndParams(filter, &filterWidth, params);

-    return buildNextMipmap(filter, filterWidth, params);
+    return buildNextMipmap(filter, filterWidth, params, min_size);
 }

-bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params)
+bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params, int min_size /*= 1*/)
 {
-    if (isNull() || (width() == 1 && height() == 1 && depth() == 1)) {
+    if (!canMakeNextMipmap(min_size)) {
        return false;
    }

@ -907,6 +1050,30 @@ bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const floa
    return true;
 }

+bool Surface::buildNextMipmapSolidColor(const float * const color_components)
+{
+    if (isNull() || (width() == 1 && height() == 1 && depth() == 1)) {
+        return false;
+    }
+
+    detach();
+
+    FloatImage * img = new FloatImage();
+    const uint w = max(1, m->image->m_width / 2);
+    const uint h = max(1, m->image->m_height / 2);
+    img->allocate(m->image->m_componentCount, w, h);
+
+    for(uint c = 0; c < img->m_componentCount; c++)
+    {
+        img->clear(c, color_components[c]);
+    }
+
+    delete m->image;
+    m->image = img;
+
+    return true;
+}
+
 void Surface::canvasSize(int w, int h, int d)
 {
    nvDebugCheck(w > 0 && h > 0 && d > 0);
@ -1083,6 +1250,7 @@ void Surface::transform(const float w0[4], const float w1[4], const float w2[4],
    m->image->transform(0, xform, voffset);
 }

+// R, G, B, A, 1, 0, -1
 void Surface::swizzle(int r, int g, int b, int a)
 {
    if (isNull()) return;
@ -1113,52 +1281,6 @@ void Surface::clamp(int channel, float low, float high)
    m->image->clamp(channel, 1, low, high);
 }

-void Surface::packNormal()
-{
-    if (isNull()) return;
-
-    detach();
-
-    m->image->scaleBias(0, 3, 0.5f, 0.5f);
-}
-
-void Surface::expandNormal()
-{
-    if (isNull()) return;
-
-    detach();
-
-    m->image->scaleBias(0, 3, 2.0f, -1.0f);
-}
-
-// Create a Toksvig map for this normal map.
-// http://blog.selfshadow.com/2011/07/22/specular-showdown/
-// @@ Assumes this is a normal map expanded in the [-1, 1] range.
-Surface Surface::createToksvigMap(float power) const
-{
-    if (isNull()) return Surface();
-
-    // @@ TODO
-
-    return Surface();
-}
-
-// @@ Should I add support for LEAN maps? That requires 5 terms, which would have to be encoded in two textures.
-// There's nothing stopping us from having 5 channels in a surface, and then, let the user swizzle them as they wish.
-// CLEAN maps are probably more practical, though.
-// http://www.cs.umbc.edu/~olano/papers/lean/
-// http://gaim.umbc.edu/2011/07/24/shiny-and-clean/
-// http://gaim.umbc.edu/2011/07/26/on-error/
-NVTT_API Surface Surface::createCleanMap() const
-{
-    if (isNull()) return Surface();
-
-    // @@ TODO
-
-    return Surface();
-}
-
-
 void Surface::blend(float red, float green, float blue, float alpha, float t)
 {
    if (isNull()) return;
@ -1285,13 +1407,10 @@ void Surface::fill(float red, float green, float blue, float alpha)
    float * a = img->channel(3);

    const uint count = img->pixelCount();
-    for (uint i = 0; i < count; i++)
-    {
-        r[i] = red;
-        g[i] = green;
-        b[i] = blue;
-        a[i] = alpha;
-    }
+    for (uint i = 0; i < count; i++) r[i] = red;
+    for (uint i = 0; i < count; i++) g[i] = green;
+    for (uint i = 0; i < count; i++) b[i] = blue;
+    for (uint i = 0; i < count; i++) a[i] = alpha;
 }


@ -1301,6 +1420,8 @@ void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)

    detach();

+    alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256);
+
    m->image->scaleAlphaToCoverage(coverage, alphaRef, 3);
 }

@ -1341,7 +1462,6 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
    detach();

    threshold = ::clamp(threshold, 1e-6f, 1.0f);
-    float irange = 1.0f / range;

    FloatImage * img = m->image;
    float * r = img->channel(0);
@ -1360,6 +1480,7 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
        r[i] = R / M;
        g[i] = G / M;
        b[i] = B / M;
+
        a[i] = (M - threshold) / (1 - threshold);

 #else
@ -1402,6 +1523,7 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
    }
 }

+
 void Surface::fromRGBM(float range/*= 1*/)
 {
    if (isNull()) return;
@ -1425,6 +1547,37 @@ void Surface::fromRGBM(float range/*= 1*/)
    }
 }

+// This is dumb way to encode luminance only values.
+void Surface::toLM(float range/*= 1*/, float threshold/*= 0.25*/)
+{
+    if (isNull()) return;
+
+    detach();
+
+    threshold = ::clamp(threshold, 1e-6f, 1.0f);
+
+    FloatImage * img = m->image;
+    float * r = img->channel(0);
+    float * g = img->channel(1);
+    float * b = img->channel(2);
+    float * a = img->channel(3);
+
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++) {
+        float R = nv::clamp(r[i], 0.0f, 1.0f);
+        float G = nv::clamp(g[i], 0.0f, 1.0f);
+        float B = nv::clamp(b[i], 0.0f, 1.0f);
+
+        float M = max(max(R, G), max(B, threshold));
+
+        float L = (R + G + B) / 3;
+        r[i] = L / M;
+        b[i] = L / M;
+        g[i] = L / M;
+        a[i] = (M - threshold) / (1 - threshold);
+    }
+}
+

 static Color32 toRgbe8(float r, float g, float b)
 {
@ -2147,21 +2300,25 @@ void Surface::quantize(int channel, int bits, bool exactEndPoints, bool dither)

    FloatImage * img = m->image;

-    float scale, offset;
+    float scale, offset0, offset1;
    if (exactEndPoints) {
+        // floor(x*(range-1) + 0.5) / (range-1)
        scale = float((1 << bits) - 1);
-        offset = 0.0f;
+        offset0 = 0.5f;
+        offset1 = 0.0f;
    }
    else {
+        // (floor(x*range) + 0.5) / range
        scale = float(1 << bits);
-        offset = 0.5f;
+        offset0 = 0.0f;
+        offset1 = 0.5f;
    }

    if (!dither) {
        float * c = img->channel(channel);
        const uint count = img->pixelCount();
        for (uint i = 0; i < count; i++) {
-            c[i] = floorf(c[i] * scale + offset) / scale;
+            c[i] = saturate((floorf(c[i] * scale + offset0) + offset1) / scale);
        }
    }
    else {
@ -2182,7 +2339,7 @@ void Surface::quantize(int channel, int bits, bool exactEndPoints, bool dither)
                    float & f = img->pixel(channel, x, y, 0);

                    // Add error and quantize.
-                    float qf = floorf((f + row0[1+x]) * scale + offset) / scale;
+                    float qf = saturate((floorf((f + row0[1+x]) * scale + offset0) + offset1) / scale);

                    // Compute new error:
                    float diff = f - qf;
@ -2221,9 +2378,6 @@ void Surface::toNormalMap(float sm, float medium, float big, float large)
    const FloatImage * img = m->image;
    m->image = nv::createNormalMap(img, (FloatImage::WrapMode)m->wrapMode, filterWeights);

-#pragma NV_MESSAGE("TODO: Pack and expand normals explicitly?")
-    m->image->packNormals(0);
-
    delete img;

    m->isNormalMap = true;
@ -2246,7 +2400,6 @@ void Surface::transformNormals(NormalTransform xform)
    detach();

    FloatImage * img = m->image;
-    img->expandNormals(0);

    const uint count = img->pixelCount();
    for (uint i = 0; i < count; i++) {
@ -2308,8 +2461,6 @@ void Surface::transformNormals(NormalTransform xform)
        y = n.y;
        z = n.z;
    }
-
-    img->packNormals(0);
 }

 void Surface::reconstructNormals(NormalTransform xform)
@ -2319,7 +2470,6 @@ void Surface::reconstructNormals(NormalTransform xform)
    detach();

    FloatImage * img = m->image;
-    img->expandNormals(0);

    const uint count = img->pixelCount();
    for (uint i = 0; i < count; i++) {
@ -2357,8 +2507,6 @@ void Surface::reconstructNormals(NormalTransform xform)
        y = n.y;
        z = n.z;
    }
-
-    img->packNormals(0);
 }

 void Surface::toCleanNormalMap()
@ -2367,8 +2515,6 @@ void Surface::toCleanNormalMap()

    detach();

-    m->image->expandNormals(0);
-
    const uint count = m->image->pixelCount();
    for (uint i = 0; i < count; i++) {
        float x = m->image->pixel(0, i);
@ -2376,22 +2522,48 @@ void Surface::toCleanNormalMap()

        m->image->pixel(2, i) = x*x + y*y;
    }
-
-    m->image->packNormals(0);
 }

 // [-1,1] -> [ 0,1]
-void Surface::packNormals() {
+void Surface::packNormals(float scale/*= 0.5f*/, float bias/*= 0.5f*/) {
    if (isNull()) return;
    detach();
-    m->image->packNormals(0);
+    m->image->scaleBias(0, 3, scale, bias);
 }

 // [ 0,1] -> [-1,1]
-void Surface::expandNormals() {
+void Surface::expandNormals(float scale/*= 2.0f*/, float bias/*= - 2.0f * 127.0f / 255.0f*/) {
    if (isNull()) return;
    detach();
-    m->image->expandNormals(0);
+    m->image->scaleBias(0, 3, scale, bias);
+}
+
+
+// Create a Toksvig map for this normal map.
+// http://blog.selfshadow.com/2011/07/22/specular-showdown/
+// @@ Assumes this is a normal map expanded in the [-1, 1] range.
+Surface Surface::createToksvigMap(float power) const
+{
+    if (isNull()) return Surface();
+
+    // @@ TODO
+
+    return Surface();
+}
+
+// @@ Should I add support for LEAN maps? That requires 5 terms, which would have to be encoded in two textures.
+// There's nothing stopping us from having 5 channels in a surface, and then, let the user swizzle them as they wish.
+// CLEAN maps are probably more practical, though.
+// http://www.cs.umbc.edu/~olano/papers/lean/
+// http://gaim.umbc.edu/2011/07/24/shiny-and-clean/
+// http://gaim.umbc.edu/2011/07/26/on-error/
+NVTT_API Surface Surface::createCleanMap() const
+{
+    if (isNull()) return Surface();
+
+    // @@ TODO
+
+    return Surface();
 }


@ -2422,7 +2594,7 @@ void Surface::flipZ()
    m->image->flipZ();
 }

-Surface Surface::subImage(int x0, int x1, int y0, int y1, int z0, int z1) const
+Surface Surface::createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const
 {
    Surface s;

@ -2495,9 +2667,6 @@ bool Surface::addChannel(const Surface & srcImage, int srcChannel, int dstChanne

    dst = m->image;

-    const uint w = src->width();
-    const uint h = src->height();
-
    float * d = dst->channel(dstChannel);
    const float * s = src->channel(srcChannel);

@ -2510,6 +2679,38 @@ bool Surface::addChannel(const Surface & srcImage, int srcChannel, int dstChanne
 }


+bool Surface::copy(const Surface & srcImage, int xsrc, int ysrc, int zsrc, int xsize, int ysize, int zsize, int xdst, int ydst, int zdst)
+{
+    if (xsrc < 0 || ysrc < 0 || zsrc < 0) return false;
+    if (xdst < 0 || ydst < 0 || zdst < 0) return false;
+
+    FloatImage * dst = m->image;
+    const FloatImage * src = srcImage.m->image;
+
+    if (toU32(xsrc + xsize) > src->width() || toU32(ysrc + ysize) > src->height() || toU32(zsrc + zsize) > src->depth()) return false;
+    if (toU32(xdst + xsize) > dst->width() || toU32(ydst + ysize) > dst->height() || toU32(zdst + zsize) > dst->depth()) return false;
+
+    detach();
+
+    // For each channel.
+    for(int i = 0; i < 4; i++) {
+        float * d = dst->channel(i);
+        const float * s = src->channel(i);
+
+        // Copy region from src to dst.
+        for (int z = 0; z < zsize; z++) {
+            for (int y = 0; y < ysize; y++) {
+                for (int x = 0; x < xsize; x++) {
+                    d[dst->index(xdst + x, ydst + y, zdst + z)] = s[src->index(xsrc + x, ysrc + y, zsrc + z)];
+                }
+            }
+        }
+    }
+
+    return true;
+}
+
+

 float nvtt::rmsError(const Surface & reference, const Surface & image)
 {
--- a/src/nvtt/Surface.h
+++ b/src/nvtt/Surface.h
@ -78,8 +78,10 @@ namespace nvtt
 } // nvtt namespace

 namespace nv {
+    bool canMakeNextMipmap(uint w, uint h, uint d, uint min_size);
    uint countMipmaps(uint w);
    uint countMipmaps(uint w, uint h, uint d);
+    uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size);
    uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format);
    void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType);
 }
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@ -454,15 +454,18 @@ namespace nvtt
        NVTT_API AlphaMode alphaMode() const;
        NVTT_API bool isNormalMap() const;
        NVTT_API int countMipmaps() const;
+        NVTT_API int countMipmaps(int min_size) const;
        NVTT_API float alphaTestCoverage(float alphaRef = 0.5) const;
        NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const;
        NVTT_API const float * data() const;
+        NVTT_API const float * channel(int i) const;
        NVTT_API void histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const;
-        NVTT_API void range(int channel, float * rangeMin, float * rangeMax) const;
+        NVTT_API void range(int channel, float * rangeMin, float * rangeMax, int alpha_channel = -1, float alpha_ref = 0.f) const;

        // Texture data.
        NVTT_API bool load(const char * fileName, bool * hasAlpha = 0);
-        NVTT_API bool save(const char * fileName) const;
+        NVTT_API bool save(const char * fileName, bool hasAlpha = 0, bool hdr = 0) const;
+        NVTT_API bool setImage(int w, int h, int d);
        NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * data);
        NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * r, const void * g, const void * b, const void * a);
        NVTT_API bool setImage2D(Format format, Decoder decoder, int w, int h, const void * data);
@ -472,9 +475,14 @@ namespace nvtt
        NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0);
        NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
        NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
-        NVTT_API bool buildNextMipmap(MipmapFilter filter);
-        NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0);
+        NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter);
+
+        NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1);
+        NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1);
+        NVTT_API bool buildNextMipmapSolidColor(const float * const color_components);
        NVTT_API void canvasSize(int w, int h, int d);
+        // associated to resizing:
+        NVTT_API bool canMakeNextMipmap(int min_size = 1);

        // Color transforms.
        NVTT_API void toLinear(float gamma);
@ -488,17 +496,15 @@ namespace nvtt
        NVTT_API void swizzle(int r, int g, int b, int a);
        NVTT_API void scaleBias(int channel, float scale, float bias);
        NVTT_API void clamp(int channel, float low = 0.0f, float high = 1.0f);
-        NVTT_API void packNormal();
-        NVTT_API void expandNormal();
        NVTT_API void blend(float r, float g, float b, float a, float t);
        NVTT_API void premultiplyAlpha();
        NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale);
        NVTT_API void setBorder(float r, float g, float b, float a);
        NVTT_API void fill(float r, float g, float b, float a);
        NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f);
-        //NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax);
        NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f);
        NVTT_API void fromRGBM(float range = 1.0f);
+        NVTT_API void toLM(float range = 1.0f, float threshold = 0.0f);
        NVTT_API void toRGBE(int mantissaBits, int exponentBits);
        NVTT_API void fromRGBE(int mantissaBits, int exponentBits);
        NVTT_API void toYCoCg();
@ -519,14 +525,14 @@ namespace nvtt
        NVTT_API void binarize(int channel, float threshold, bool dither);
        NVTT_API void quantize(int channel, int bits, bool exactEndPoints, bool dither);

-        // Normal map transforms. @@ All these methods assume packed normals.
+        // Normal map transforms.
        NVTT_API void toNormalMap(float sm, float medium, float big, float large);
        NVTT_API void normalizeNormalMap();
        NVTT_API void transformNormals(NormalTransform xform);
        NVTT_API void reconstructNormals(NormalTransform xform);
        NVTT_API void toCleanNormalMap();
-        NVTT_API void packNormals();   // [-1,1] -> [ 0,1]
-        NVTT_API void expandNormals(); // [ 0,1] -> [-1,1]
+        NVTT_API void packNormals(float scale = 0.5f, float bias = 0.5f);       // [-1,1] -> [ 0,1]
+        NVTT_API void expandNormals(float scale = 2.0f, float bias = -1.0f);    // [ 0,1] -> [-1,1]
        NVTT_API Surface createToksvigMap(float power) const;
        NVTT_API Surface createCleanMap() const;

@ -534,7 +540,7 @@ namespace nvtt
        NVTT_API void flipX();
        NVTT_API void flipY();
        NVTT_API void flipZ();
-        NVTT_API Surface subImage(int x0, int x1, int y0, int y1, int z0, int z1) const;
+        NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const;

        // Copy image data.
        NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel);
@ -542,6 +548,9 @@ namespace nvtt

        NVTT_API bool addChannel(const Surface & img, int srcChannel, int dstChannel, float scale);

+        NVTT_API bool copy(const Surface & src, int xsrc, int ysrc, int zsrc, int xsize, int ysize, int zsize, int xdst, int ydst, int zdst);
+
+
    //private:
        void detach();

@ -599,12 +608,15 @@ namespace nvtt

        NVTT_API float average(int channel) const;
        NVTT_API void range(int channel, float * minimum_ptr, float * maximum_ptr) const;
+        NVTT_API void clamp(int channel, float low = 0.0f, float high = 1.0f);


        // Filtering.
        NVTT_API CubeSurface irradianceFilter(int size, EdgeFixup fixupMethod) const;
        NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const;

+        NVTT_API CubeSurface fastResample(int size, EdgeFixup fixupMethod) const;
+

        /*
        NVTT_API void resize(int w, int h, ResizeFilter filter);
--- a/src/nvtt/tests/testsuite.cpp
+++ b/src/nvtt/tests/testsuite.cpp
@ -856,7 +856,7 @@ int main(int argc, char *argv[])
            outputFileName.stripExtension();
            if (set.type == ImageType_HDR) outputFileName.append(".dds");
            else outputFileName.append(".tga");
-            if (!img_out.save(outputFileName.str()))
+            if (!img_out.save(outputFileName.str(), set.type == ImageType_RGBA, set.type == ImageType_HDR))
            {
                printf("Error saving file '%s'.\n", outputFileName.str());
            }
--- a/src/nvtt/tools/cmdline.h
+++ b/src/nvtt/tools/cmdline.h
@ -58,7 +58,7 @@ struct MyAssertHandler : public nv::AssertHandler {
    }

    // Handler method, note that func might be NULL!
-    virtual int assertion( const char *exp, const char *file, int line, const char *func ) {
+    virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) {
        fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line);
        nv::debug::dumpInfo();
        exit(1);