From af9ba8ae90570cfd5cc66df77d782dcdbdc32eff Mon Sep 17 00:00:00 2001
From: castano <castano@95f4ed2b-212e-0410-8b90-d31948207fce>
Date: Tue, 27 Sep 2011 05:17:01 +0000
Subject: [PATCH] Add support for 3D texture. Integrate patch provided in issue
 28 plus some additional changes.

---
 src/nvimage/ColorBlock.cpp           |   2 +-
 src/nvimage/DirectDrawSurface.cpp    |  12 +-
 src/nvimage/ErrorMetric.cpp          |  48 +-
 src/nvimage/FloatImage.cpp           | 758 +++++++++++++++++++--------
 src/nvimage/FloatImage.h             | 271 +++++++---
 src/nvimage/Image.cpp                |  29 +-
 src/nvimage/Image.h                  |   6 +-
 src/nvimage/ImageIO.cpp              |  10 +-
 src/nvimage/NormalMap.cpp            | 246 ++++-----
 src/nvtt/Compressor.h                |   2 +-
 src/nvtt/CompressorDX9.cpp           | 398 ++++++--------
 src/nvtt/CompressorDX9.h             |   8 +-
 src/nvtt/CompressorDXT.cpp           |   8 +-
 src/nvtt/CompressorDXT.h             |   4 +-
 src/nvtt/CompressorRGB.cpp           | 115 ++--
 src/nvtt/CompressorRGB.h             |   2 +-
 src/nvtt/CompressorRGBE.cpp          |  16 +-
 src/nvtt/CompressorRGBE.h            |   6 +-
 src/nvtt/Context.cpp                 |  34 +-
 src/nvtt/InputOptions.cpp            |   3 -
 src/nvtt/TexImage.cpp                | 516 ++++++++++--------
 src/nvtt/cuda/CudaCompressorDXT.cpp  |   3 +-
 src/nvtt/cuda/CudaCompressorDXT.h    |   2 +-
 src/nvtt/cuda/CudaUtils.cpp          |   2 +-
 src/nvtt/nvtt.h                      |  68 ++-
 src/nvtt/tests/imperativeapi.cpp     |   2 +-
 src/nvtt/tests/process_alpha_map.cpp |   8 +-
 src/nvtt/tests/testsuite.cpp         |   6 +-
 28 files changed, 1535 insertions(+), 1050 deletions(-)

diff --git a/src/nvimage/ColorBlock.cpp b/src/nvimage/ColorBlock.cpp
index 7ae90a4..9f18edf 100644
--- a/src/nvimage/ColorBlock.cpp
+++ b/src/nvimage/ColorBlock.cpp
@@ -2,6 +2,7 @@
 
 #include "ColorBlock.h"
 #include "Image.h"
+#include "FloatImage.h"
 
 #include "nvmath/Box.h"
 #include "nvcore/Utils.h" // swap
@@ -458,7 +459,6 @@ float ColorBlock::volume() const
     return bounds.volume();
 }*/
 
-#include "FloatImage.h"
 
 void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y)
 {
diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp
index b947682..f67644b 100644
--- a/src/nvimage/DirectDrawSurface.cpp
+++ b/src/nvimage/DirectDrawSurface.cpp
@@ -983,12 +983,6 @@ bool DirectDrawSurface::isSupported() const
             // Cubemaps must contain all faces.
             return false;
         }
-
-        if (isTexture3D())
-        {
-            // @@ 3D textures not supported yet.
-            return false;
-        }
     }
 
     return true;
@@ -1127,15 +1121,17 @@ void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap)
 
     uint w = width();
     uint h = height();
+	uint d = depth();
 
     // Compute width and height.
     for (uint m = 0; m < mipmap; m++)
     {
         w = max(1U, w / 2);
         h = max(1U, h / 2);
+		d = max(1U, d / 2);
     }
 
-    img->allocate(w, h);
+    img->allocate(w, h, d);
 
     if (hasAlpha())
     {
@@ -1416,7 +1412,7 @@ uint DirectDrawSurface::mipmapSize(uint mipmap) const
         // @@ How are 3D textures aligned?
         w = (w + 3) / 4;
         h = (h + 3) / 4;
-        return blockSize() * w * h;
+        return blockSize() * w * h * d;
     }
     else
     {
diff --git a/src/nvimage/ErrorMetric.cpp b/src/nvimage/ErrorMetric.cpp
index c259a57..e9e8bc3 100644
--- a/src/nvimage/ErrorMetric.cpp
+++ b/src/nvimage/ErrorMetric.cpp
@@ -11,15 +11,15 @@ using namespace nv;
 
 float nv::rmsColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight)
 {
-    if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
+    if (!sameLayout(img, ref)) {
         return FLT_MAX;
     }
-    nvDebugCheck(img->componentNum() == 4);
-    nvDebugCheck(ref->componentNum() == 4);
+    nvDebugCheck(img->componentCount() == 4);
+    nvDebugCheck(ref->componentCount() == 4);
 
     double mse = 0;
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++)
     {
         float r0 = img->pixel(i + count * 0);
@@ -48,14 +48,14 @@ float nv::rmsColorError(const FloatImage * img, const FloatImage * ref, bool alp
 
 float nv::rmsAlphaError(const FloatImage * img, const FloatImage * ref)
 {
-    if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
+    if (!sameLayout(img, ref)) {
         return FLT_MAX;
     }
-    nvDebugCheck(img->componentNum() == 4 && ref->componentNum() == 4);
+    nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
 
     double mse = 0;
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++)
     {
         float a0 = img->pixel(i + count * 3);
@@ -72,15 +72,15 @@ float nv::rmsAlphaError(const FloatImage * img, const FloatImage * ref)
 
 float nv::averageColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight)
 {
-    if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
+    if (!sameLayout(img, ref)) {
         return FLT_MAX;
     }
-    nvDebugCheck(img->componentNum() == 4);
-    nvDebugCheck(ref->componentNum() == 4);
+    nvDebugCheck(img->componentCount() == 4);
+    nvDebugCheck(ref->componentCount() == 4);
 
     double mae = 0;
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++)
     {
         float r0 = img->pixel(i + count * 0);
@@ -112,7 +112,7 @@ float nv::averageAlphaError(const FloatImage * img, const FloatImage * ref)
     if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
         return FLT_MAX;
     }
-    nvDebugCheck(img->componentNum() == 4 && ref->componentNum() == 4);
+    nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
 
     double mae = 0;
 
@@ -227,7 +227,7 @@ static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage)
 {
     nvDebugCheck(rgbImage != NULL && LabImage != NULL);
     nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height());
-    nvDebugCheck(rgbImage->componentNum() >= 3 && LabImage->componentNum() >= 3);
+    nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3);
 
     const uint w = rgbImage->width();
     const uint h = LabImage->height();
@@ -254,13 +254,8 @@ static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage)
 // Assumes input images are in linear sRGB space.
 float nv::cieLabError(const FloatImage * img0, const FloatImage * img1)
 {
-    if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
-        return FLT_MAX;
-    }
-    nvDebugCheck(img0->componentNum() == 4 && img0->componentNum() == 4);
-
-    uint w = img0->width();
-    uint h = img0->height();
+    if (!sameLayout(img0, img1)) return FLT_MAX;
+    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
 
     const float * r0 = img0->channel(0);
     const float * g0 = img0->channel(1);
@@ -272,7 +267,7 @@ float nv::cieLabError(const FloatImage * img0, const FloatImage * img1)
 
     double error = 0.0f;
 
-    const uint count = w*h;
+    const uint count = img0->pixelCount();
     for (uint i = 0; i < count; i++)
     {
         Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
@@ -292,14 +287,15 @@ float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1)
     if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
         return FLT_MAX;
     }
-    nvDebugCheck(img0->componentNum() == 4 && img0->componentNum() == 4);
+    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
 
     uint w = img0->width();
     uint h = img0->height();
+    uint d = img0->depth();
 
     FloatImage lab0, lab1; // Original images in CIE-Lab space.
-    lab0.allocate(3, w, h);
-    lab1.allocate(3, w, h);
+    lab0.allocate(3, w, h, d);
+    lab1.allocate(3, w, h, d);
 
     // Convert input images to CIE-Lab.
     rgbToCieLab(img0, &lab0);
@@ -331,7 +327,7 @@ float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1)
     if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
         return FLT_MAX;
     }
-    nvDebugCheck(img0->componentNum() == 4 && img0->componentNum() == 4);
+    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
 
     uint w = img0->width();
     uint h = img0->height();
@@ -369,7 +365,7 @@ float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1)
     if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
         return FLT_MAX;
     }
-    nvDebugCheck(img0->componentNum() == 4 && img0->componentNum() == 4);
+    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
 
     uint w = img0->width();
     uint h = img0->height();
diff --git a/src/nvimage/FloatImage.cpp b/src/nvimage/FloatImage.cpp
index d789fd0..17dac70 100644
--- a/src/nvimage/FloatImage.cpp
+++ b/src/nvimage/FloatImage.cpp
@@ -17,31 +17,16 @@
 
 using namespace nv;
 
-namespace 
-{
-    static int mirror(int x, int w)
-    {
-        if (w == 1) return 0;
-
-        x = abs(x);
-        while (x >= w) {
-            x = abs(w + w - x - 2);
-        }
-
-        return x;
-    }
-}
-
 
 /// Ctor.
-FloatImage::FloatImage() : m_width(0), m_height(0), 
-    m_componentNum(0), m_count(0), m_mem(NULL)
+FloatImage::FloatImage() : m_componentCount(0), m_width(0), m_height(0), m_depth(0),
+  m_pixelCount(0), m_floatCount(0), m_mem(NULL)
 {
 }
 
 /// Ctor. Init from image.
-FloatImage::FloatImage(const Image * img) : m_width(0), m_height(0), 
-    m_componentNum(0), m_count(0), m_mem(NULL)
+FloatImage::FloatImage(const Image * img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0),
+    m_pixelCount(0), m_floatCount(0), m_mem(NULL)
 {
     initFrom(img);
 }
@@ -58,15 +43,15 @@ void FloatImage::initFrom(const Image * img)
 {
     nvCheck(img != NULL);
 
-    allocate(4, img->width(), img->height());
+    allocate(4, img->width(), img->height(), img->depth());
 
     float * red_channel = channel(0);
     float * green_channel = channel(1);
     float * blue_channel = channel(2);
     float * alpha_channel = channel(3);
 
-    const uint count = m_width * m_height;
-    for(uint i = 0; i < count; i++) {
+    const uint count = m_pixelCount;
+    for (uint i = 0; i < count; i++) {
         Color32 pixel = img->pixel(i);
         red_channel[i] = float(pixel.r) / 255.0f;
         green_channel[i] = float(pixel.g) / 255.0f;
@@ -76,22 +61,21 @@ void FloatImage::initFrom(const Image * img)
 }
 
 /// Convert the floating point image to a regular image.
-Image * FloatImage::createImage(uint base_component/*= 0*/, uint num/*= 4*/) const
+Image * FloatImage::createImage(uint baseComponent/*= 0*/, uint num/*= 4*/) const
 {
     nvCheck(num <= 4);
-    nvCheck(base_component + num <= m_componentNum);
+    nvCheck(baseComponent + num <= m_componentCount);
 
     AutoPtr<Image> img(new Image());
-    img->allocate(m_width, m_height);
+    img->allocate(m_width, m_height, m_depth);
 
-    const uint size = m_width * m_height;
-    for(uint i = 0; i < size; i++) {
+    for (uint i = 0; i < m_pixelCount; i++) {
 
         uint c;
         uint8 rgba[4]= {0, 0, 0, 0xff};
 
-        for(c = 0; c < num; c++) {
-            float f = m_mem[size * (base_component + c) + i];
+        for (c = 0; c < num; c++) {
+            float f = pixel(baseComponent + c, i);
             rgba[c] = nv::clamp(int(255.0f * f), 0, 255);
         }
 
@@ -105,18 +89,18 @@ Image * FloatImage::createImage(uint base_component/*= 0*/, uint num/*= 4*/) con
 /// Convert the floating point image to a regular image. Correct gamma of rgb, but not alpha.
 Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const
 {
-    nvCheck(m_componentNum == 4);
+    nvCheck(m_componentCount == 4);
 
     AutoPtr<Image> img(new Image());
-    img->allocate(m_width, m_height);
+    img->allocate(m_width, m_height, m_depth);
 
     const float * rChannel = this->channel(0);
     const float * gChannel = this->channel(1);
     const float * bChannel = this->channel(2);
     const float * aChannel = this->channel(3);
 
-    const uint size = m_width * m_height;
-    for (uint i = 0; i < size; i++)
+    const uint count = m_pixelCount;
+    for (uint i = 0; i < count; i++)
     {
         const uint8 r = nv::clamp(int(255.0f * pow(rChannel[i], 1.0f/gamma)), 0, 255);
         const uint8 g = nv::clamp(int(255.0f * pow(gChannel[i], 1.0f/gamma)), 0, 255);
@@ -130,17 +114,19 @@ Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const
 }
 
 /// Allocate a 2D float image of the given format and the given extents.
-void FloatImage::allocate(uint c, uint w, uint h)
+void FloatImage::allocate(uint c, uint w, uint h, uint d)
 {
-    if (m_componentNum != c || m_width != w || m_height != h)
+    if (m_componentCount != c || m_width != w || m_height != h || m_depth != d)
     {
         free();
 
         m_width = w;
         m_height = h;
-        m_componentNum = c;
-        m_count = w * h * c;
-        m_mem = malloc<float>(m_count);
+        m_depth = d;
+        m_componentCount = c;
+        m_pixelCount = w * h * d;
+        m_floatCount = m_pixelCount * c;
+        m_mem = malloc<float>(m_floatCount);
     }
 }
 
@@ -153,46 +139,46 @@ void FloatImage::free()
 
 void FloatImage::resizeChannelCount(uint c)
 {
-    if (m_componentNum != c) {
-        uint count = m_width * m_height * c;
+    if (m_componentCount != c) {
+        uint count = m_pixelCount * c;
         m_mem = realloc<float>(m_mem, count);
 
-        if (c > m_componentNum) {
-            memset(m_mem + m_count, 0, (count - m_count) * sizeof(float));
+        if (c > m_componentCount) {
+            memset(m_mem + m_floatCount, 0, (count - m_floatCount) * sizeof(float));
         }
 
-        m_componentNum = c;
-        m_count = count;
+        m_componentCount = c;
+        m_floatCount = count;
     }
 }
 
 void FloatImage::clear(float f/*=0.0f*/)
 {
-    for (uint i = 0; i < m_count; i++) {
+    for (uint i = 0; i < m_floatCount; i++) {
         m_mem[i] = f;
     }
 }
 
-void FloatImage::clear(uint component, float f/*= 0.0f*/)
+void FloatImage::clear(uint c, float f/*= 0.0f*/)
 {
-    float * channel = this->channel(component);
+    float * channel = this->channel(c);
 
-    const uint size = m_width * m_height;
-    for(uint i = 0; i < size; i++) {
+    const uint count = m_pixelCount;
+    for (uint i = 0; i < count; i++) {
         channel[i] = f;
     }
 }
 
-void FloatImage::normalize(uint base_component)
+void FloatImage::normalize(uint baseComponent)
 {
-    nvCheck(base_component + 3 <= m_componentNum);
+    nvCheck(baseComponent + 3 <= m_componentCount);
 
-    float * xChannel = this->channel(base_component + 0);
-    float * yChannel = this->channel(base_component + 1);
-    float * zChannel = this->channel(base_component + 2);
+    float * xChannel = this->channel(baseComponent + 0);
+    float * yChannel = this->channel(baseComponent + 1);
+    float * zChannel = this->channel(baseComponent + 2);
 
-    const uint size = m_width * m_height;
-    for(uint i = 0; i < size; i++) {
+    const uint count = m_pixelCount;
+    for (uint i = 0; i < count; i++) {
 
         Vector3 normal(xChannel[i], yChannel[i], zChannel[i]);
         normal = normalizeSafe(normal, Vector3(zero), 0.0f);
@@ -203,62 +189,62 @@ void FloatImage::normalize(uint base_component)
     }
 }
 
-void FloatImage::packNormals(uint base_component)
+void FloatImage::packNormals(uint baseComponent)
 {
-    scaleBias(base_component, 3, 0.5f, 0.5f);
+    scaleBias(baseComponent, 3, 0.5f, 0.5f);
 }
 
-void FloatImage::expandNormals(uint base_component)
+void FloatImage::expandNormals(uint baseComponent)
 {
-    scaleBias(base_component, 3, 2, -1.0);
+    scaleBias(baseComponent, 3, 2, -1.0);
 }
 
-void FloatImage::scaleBias(uint base_component, uint num, float scale, float bias)
+void FloatImage::scaleBias(uint baseComponent, uint num, float scale, float bias)
 {
-    const uint size = m_width * m_height;
+    const uint size = m_pixelCount;
 
-    for(uint c = 0; c < num; c++) {
-        float * ptr = this->channel(base_component + c);
+    for (uint c = 0; c < num; c++) {
+        float * ptr = this->channel(baseComponent + c);
 
-        for(uint i = 0; i < size; i++) {
+        for (uint i = 0; i < size; i++) {
             ptr[i] = scale * ptr[i] + bias;
         }
     }
 }
 
 /// Clamp the elements of the image.
-void FloatImage::clamp(uint base_component, uint num, float low, float high)
+void FloatImage::clamp(uint baseComponent, uint num, float low, float high)
 {
-    const uint size = m_width * m_height;
+    const uint size = m_pixelCount;
 
-    for(uint c = 0; c < num; c++) {
-        float * ptr = this->channel(base_component + c);
+    for (uint c = 0; c < num; c++) {
+        float * ptr = this->channel(baseComponent + c);
 
-        for(uint i = 0; i < size; i++) {
+        for (uint i = 0; i < size; i++) {
             ptr[i] = nv::clamp(ptr[i], low, high);
         }
     }
 }
 
 /// From gamma to linear space.
-void FloatImage::toLinear(uint base_component, uint num, float gamma /*= 2.2f*/)
+void FloatImage::toLinear(uint baseComponent, uint num, float gamma /*= 2.2f*/)
 {
-    exponentiate(base_component, num, gamma);
+    exponentiate(baseComponent, num, gamma);
 }
 
 /// From linear to gamma space.
-void FloatImage::toGamma(uint base_component, uint num, float gamma /*= 2.2f*/)
+void FloatImage::toGamma(uint baseComponent, uint num, float gamma /*= 2.2f*/)
 {
-    exponentiate(base_component, num, 1.0f/gamma);
+    exponentiate(baseComponent, num, 1.0f/gamma);
 }
 
 /// Exponentiate the elements of the image.
-void FloatImage::exponentiate(uint base_component, uint num, float power)
+void FloatImage::exponentiate(uint baseComponent, uint num, float power)
 {
-    const uint size = m_width * m_height;
+    const uint size = m_pixelCount;
 
     for(uint c = 0; c < num; c++) {
-        float * ptr = this->channel(base_component + c);
+        float * ptr = this->channel(baseComponent + c);
 
         for(uint i = 0; i < size; i++) {
             ptr[i] = powf(max(0.0f, ptr[i]), power);
@@ -267,17 +253,16 @@ void FloatImage::exponentiate(uint base_component, uint num, float power)
 }
 
 /// Apply linear transform.
-void FloatImage::transform(uint base_component, const Matrix & m, Vector4::Arg offset)
+void FloatImage::transform(uint baseComponent, const Matrix & m, Vector4::Arg offset)
 {
-    nvCheck(base_component + 4 <= m_componentNum);
+    nvCheck(baseComponent + 4 <= m_componentCount);
 
-    const uint size = m_width * m_height;
-
-    float * r = this->channel(base_component + 0);
-    float * g = this->channel(base_component + 1);
-    float * b = this->channel(base_component + 2);
-    float * a = this->channel(base_component + 3);
+    float * r = this->channel(baseComponent + 0);
+    float * g = this->channel(baseComponent + 1);
+    float * b = this->channel(baseComponent + 2);
+    float * a = this->channel(baseComponent + 3);
 
+    const uint size = m_pixelCount;
     for (uint i = 0; i < size; i++)
     {
         Vector4 color = nv::transform(m, Vector4(*r, *g, *b, *a)) + offset;
@@ -289,23 +274,22 @@ void FloatImage::transform(uint base_component, const Matrix & m, Vector4::Arg o
     }
 }
 
-void FloatImage::swizzle(uint base_component, uint r, uint g, uint b, uint a)
+void FloatImage::swizzle(uint baseComponent, uint r, uint g, uint b, uint a)
 {
-    nvCheck(base_component + 4 <= m_componentNum);
+    nvCheck(baseComponent + 4 <= m_componentCount);
     nvCheck(r < 7 && g < 7 && b < 7 && a < 7);
 
-    const uint size = m_width * m_height;
-
     float consts[] = { 1.0f, 0.0f, -1.0f };
     float * c[7];
-    c[0] = this->channel(base_component + 0);
-    c[1] = this->channel(base_component + 1);
-    c[2] = this->channel(base_component + 2);
-    c[3] = this->channel(base_component + 3);
+    c[0] = this->channel(baseComponent + 0);
+    c[1] = this->channel(baseComponent + 1);
+    c[2] = this->channel(baseComponent + 2);
+    c[3] = this->channel(baseComponent + 3);
     c[4] = consts;
     c[5] = consts + 1;
     c[6] = consts + 2;
 
+    const uint size = m_pixelCount;
     for (uint i = 0; i < size; i++)
     {
         float tmp[4] = { *c[r], *c[g], *c[b], *c[a] };
@@ -317,42 +301,81 @@ void FloatImage::swizzle(uint base_component, uint r, uint g, uint b, uint a)
     }
 }
 
-float FloatImage::sampleNearest(const float x, const float y, const int c, const WrapMode wm) const
+float FloatImage::sampleNearest(uint c, float x, float y, const WrapMode wm) const
 {
     if( wm == WrapMode_Clamp ) return sampleNearestClamp(x, y, c);
     else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(x, y, c);
     else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(x, y, c);
 }
 
-float FloatImage::sampleLinear(const float x, const float y, const int c, const WrapMode wm) const
+float FloatImage::sampleLinear(uint c, float x, float y, WrapMode wm) const
 {
     if( wm == WrapMode_Clamp ) return sampleLinearClamp(x, y, c);
     else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(x, y, c);
     else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(x, y, c);
 }
 
-float FloatImage::sampleNearestClamp(const float x, const float y, const int c) const
+float FloatImage::sampleNearest(uint c, float x, float y, float z, WrapMode wm) const
+{
+    if( wm == WrapMode_Clamp ) return sampleNearestClamp(x, y, z, c);
+    else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(x, y, z, c);
+    else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(x, y, z, c);
+}
+
+float FloatImage::sampleLinear(uint c, float x, float y, float z, WrapMode wm) const
+{
+    if( wm == WrapMode_Clamp ) return sampleLinearClamp(x, y, z, c);
+    else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(x, y, z, c);
+    else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(x, y, z, c);
+}
+
+float FloatImage::sampleNearestClamp(uint c, float x, float y) const
 {
     int ix = ::clamp(iround(x * m_width), 0, m_width-1);
     int iy = ::clamp(iround(y * m_height), 0, m_height-1);
-    return pixel(ix, iy, c);
+    return pixel(c, ix, iy, 0);
 }
 
-float FloatImage::sampleNearestRepeat(const float x, const float y, const int c) const
+float FloatImage::sampleNearestRepeat(uint c, float x, float y) const
 {
     int ix = iround(frac(x) * m_width);
     int iy = iround(frac(y) * m_height);
-    return pixel(ix, iy, c);
+    return pixel(c, ix, iy, 0);
 }
 
-float FloatImage::sampleNearestMirror(const float x, const float y, const int c) const
+float FloatImage::sampleNearestMirror(uint c, float x, float y) const
 {
-    int ix = mirror(iround(x * m_width), m_width);
-    int iy = mirror(iround(y * m_height), m_height);
-    return pixel(ix, iy, c);
+    int ix = wrapMirror(iround(x * m_width), m_width);
+    int iy = wrapMirror(iround(y * m_height), m_height);
+    return pixel(c, ix, iy, 0);
 }
 
-float FloatImage::sampleLinearClamp(float x, float y, const int c) const
+float FloatImage::sampleNearestClamp(uint c, float x, float y, float z) const
+{
+    int ix = wrapClamp(iround(x * m_width), m_width);
+    int iy = wrapClamp(iround(y * m_height), m_height);
+    int iz = wrapClamp(iround(z * m_depth), m_depth);
+    return pixel(c, ix, iy, iz);
+}
+
+float FloatImage::sampleNearestRepeat(uint c, float x, float y, float z) const
+{
+    int ix = iround(frac(x) * m_width);     // wrapRepeat(iround(x * m_width), m_width)
+    int iy = iround(frac(y) * m_height);    // wrapRepeat(iround(y * m_height), m_height)
+    int iz = iround(frac(z) * m_depth);     // wrapRepeat(iround(z * m_depth), m_depth)
+    return pixel(c, ix, iy, iz);
+}
+
+float FloatImage::sampleNearestMirror(uint c, float x, float y, float z) const
+{
+    int ix = wrapMirror(iround(x * m_width), m_width);
+    int iy = wrapMirror(iround(y * m_height), m_height);
+    int iz = wrapMirror(iround(z * m_depth), m_depth);
+    return pixel(c, ix, iy, iz);
+}
+
+
+float FloatImage::sampleLinearClamp(uint c, float x, float y) const
 {
     const int w = m_width;
     const int h = m_height;
@@ -368,18 +391,10 @@ float FloatImage::sampleLinearClamp(float x, float y, const int c) const
     const int ix1 = ::clamp(ifloor(x)+1, 0, w-1);
     const int iy1 = ::clamp(ifloor(y)+1, 0, h-1);
 
-    float f1 = pixel(ix0, iy0, c);
-    float f2 = pixel(ix1, iy0, c);
-    float f3 = pixel(ix0, iy1, c);
-    float f4 = pixel(ix1, iy1, c);
-
-    float i1 = lerp(f1, f2, fracX);
-    float i2 = lerp(f3, f4, fracX);
-
-    return lerp(i1, i2, fracY);
+    return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY);
 }
 
-float FloatImage::sampleLinearRepeat(float x, float y, int c) const
+float FloatImage::sampleLinearRepeat(uint c, float x, float y) const
 {
     const int w = m_width;
     const int h = m_height;
@@ -387,47 +402,101 @@ float FloatImage::sampleLinearRepeat(float x, float y, int c) const
     const float fracX = frac(x * w);
     const float fracY = frac(y * h);
 
+    // @@ Using floor in some places, but round in others?
     int ix0 = ifloor(frac(x) * w);
     int iy0 = ifloor(frac(y) * h);
     int ix1 = ifloor(frac(x + 1.0f/w) * w);
     int iy1 = ifloor(frac(y + 1.0f/h) * h);
 
-    float f1 = pixel(ix0, iy0, c);
-    float f2 = pixel(ix1, iy0, c);
-    float f3 = pixel(ix0, iy1, c);
-    float f4 = pixel(ix1, iy1, c);
+    return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY);
+}
 
-    float i1 = lerp(f1, f2, fracX);
-    float i2 = lerp(f3, f4, fracX);
+float FloatImage::sampleLinearMirror(uint c, float x, float y) const
+{
+    const int w = m_width;
+    const int h = m_height;
+
+    x *= w;
+    y *= h;
+
+    const float fracX = frac(x);
+    const float fracY = frac(y);
 
-    return lerp(i1, i2, fracY);
+    int ix0 = wrapMirror(iround(x), w);
+    int iy0 = wrapMirror(iround(y), h);
+    int ix1 = wrapMirror(iround(x) + 1, w);
+    int iy1 = wrapMirror(iround(y) + 1, h);
+
+    return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY);
 }
 
-float FloatImage::sampleLinearMirror(float x, float y, int c) const
+float FloatImage::sampleLinearClamp(uint c, float x, float y, float z) const
 {
     const int w = m_width;
     const int h = m_height;
+    const int d = m_depth;
 
     x *= w;
     y *= h;
+    z *= d;
 
     const float fracX = frac(x);
     const float fracY = frac(y);
+    const float fracZ = frac(z);
+
+    // @@ Using floor in some places, but round in others?
+    const int ix0 = ::clamp(ifloor(x), 0, w-1);
+    const int iy0 = ::clamp(ifloor(y), 0, h-1);
+    const int iz0 = ::clamp(ifloor(z), 0, h-1);
+    const int ix1 = ::clamp(ifloor(x)+1, 0, w-1);
+    const int iy1 = ::clamp(ifloor(y)+1, 0, h-1);
+    const int iz1 = ::clamp(ifloor(z)+1, 0, h-1);
+
+    return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ);
+}
+
+float FloatImage::sampleLinearRepeat(uint c, float x, float y, float z) const
+{
+    const int w = m_width;
+    const int h = m_height;
+    const int d = m_depth;
+
+    const float fracX = frac(x * w);
+    const float fracY = frac(y * h);
+    const float fracZ = frac(z * d);
+
+    int ix0 = ifloor(frac(x) * w);
+    int iy0 = ifloor(frac(y) * h);
+    int iz0 = ifloor(frac(z) * d);
+    int ix1 = ifloor(frac(x + 1.0f/w) * w);
+    int iy1 = ifloor(frac(y + 1.0f/h) * h);
+    int iz1 = ifloor(frac(z + 1.0f/d) * d);
+
+    return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ);
+}
+
+float FloatImage::sampleLinearMirror(uint c, float x, float y, float z) const
+{
+    const int w = m_width;
+    const int h = m_height;
+    const int d = m_depth;
 
-    int ix0 = mirror(iround(x), w);
-    int iy0 = mirror(iround(y), h);
-    int ix1 = mirror(iround(x) + 1, w);
-    int iy1 = mirror(iround(y) + 1, h);
+    x *= w;
+    y *= h;
+    z *= d;
 
-    float f1 = pixel(ix0, iy0, c);
-    float f2 = pixel(ix1, iy0, c);
-    float f3 = pixel(ix0, iy1, c);
-    float f4 = pixel(ix1, iy1, c);
+    int ix0 = wrapMirror(iround(x), w);
+    int iy0 = wrapMirror(iround(y), h);
+    int iz0 = wrapMirror(iround(z), d);
+    int ix1 = wrapMirror(iround(x) + 1, w);
+    int iy1 = wrapMirror(iround(y) + 1, h);
+    int iz1 = wrapMirror(iround(z) + 1, d);
 
-    float i1 = lerp(f1, f2, fracX);
-    float i2 = lerp(f3, f4, fracX);
+    const float fracX = frac(x);
+    const float fracY = frac(y);
+    const float fracZ = frac(z);
 
-    return lerp(i1, i2, fracY);
+    return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ);
 }
 
 
@@ -440,13 +509,14 @@ float FloatImage::sampleLinearMirror(float x, float y, int c) const
 ///
 FloatImage * FloatImage::fastDownSample() const
 {
+    nvDebugCheck(m_depth == 1);
     nvDebugCheck(m_width != 1 || m_height != 1);
 
     AutoPtr<FloatImage> dst_image( new FloatImage() );
 
     const uint w = max(1, m_width / 2);
     const uint h = max(1, m_height / 2);
-    dst_image->allocate(m_componentNum, w, h);
+    dst_image->allocate(m_componentCount, w, h);
 
     // 1D box filter.
     if (m_width == 1 || m_height == 1)
@@ -457,7 +527,7 @@ FloatImage * FloatImage::fastDownSample() const
         {
             const float scale = 1.0f / (2 * n + 1);
 
-            for(uint c = 0; c < m_componentNum; c++)
+            for(uint c = 0; c < m_componentCount; c++)
             {
                 const float * src = this->channel(c);
                 float * dst = dst_image->channel(c);
@@ -475,7 +545,7 @@ FloatImage * FloatImage::fastDownSample() const
         }
         else
         {
-            for(uint c = 0; c < m_componentNum; c++)
+            for(uint c = 0; c < m_componentCount; c++)
             {
                 const float * src = this->channel(c);
                 float * dst = dst_image->channel(c);
@@ -493,7 +563,7 @@ FloatImage * FloatImage::fastDownSample() const
     // Regular box filter.
     else if ((m_width & 1) == 0 && (m_height & 1) == 0)
     {
-        for(uint c = 0; c < m_componentNum; c++)
+        for(uint c = 0; c < m_componentCount; c++)
         {
             const float * src = this->channel(c);
             float * dst = dst_image->channel(c);
@@ -520,7 +590,7 @@ FloatImage * FloatImage::fastDownSample() const
 
         const float scale = 1.0f / (m_width * m_height);
 
-        for(uint c = 0; c < m_componentNum; c++)
+        for(uint c = 0; c < m_componentCount; c++)
         {
             const float * src = this->channel(c);
             float * dst = dst_image->channel(c);
@@ -555,7 +625,7 @@ FloatImage * FloatImage::fastDownSample() const
         nvDebugCheck(m_width == 2 * w + 1);
         const float scale = 1.0f / (2 * m_width);
 
-        for(uint c = 0; c < m_componentNum; c++)
+        for(uint c = 0; c < m_componentCount; c++)
         {
             const float * src = this->channel(c);
             float * dst = dst_image->channel(c);
@@ -587,7 +657,7 @@ FloatImage * FloatImage::fastDownSample() const
 
         const float scale = 1.0f / (2 * m_height);
 
-        for(uint c = 0; c < m_componentNum; c++)
+        for(uint c = 0; c < m_componentCount; c++)
         {
             const float * src = this->channel(c);
             float * dst = dst_image->channel(c);
@@ -622,8 +692,9 @@ FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const
 {
     const uint w = max(1, m_width / 2);
     const uint h = max(1, m_height / 2);
+    const uint d = max(1, m_depth / 2);
 
-    return resize(filter, w, h, wm);
+    return resize(filter, w, h, d, wm);
 }
 
 /// Downsample applying a 1D kernel separately in each dimension.
@@ -631,8 +702,9 @@ FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm, uint alp
 {
     const uint w = max(1, m_width / 2);
     const uint h = max(1, m_height / 2);
+    const uint d = max(1, m_depth / 2);
 
-    return resize(filter, w, h, wm, alpha);
+    return resize(filter, w, h, d, wm, alpha);
 }
 
 
@@ -650,66 +722,110 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
     // @@ Select fastest filtering order:
     //if (w * m_height <= h * m_width)
     {
-        tmp_image->allocate(m_componentNum, w, m_height);
-        dst_image->allocate(m_componentNum, w, h);
+        tmp_image->allocate(m_componentCount, w, m_height);
+        dst_image->allocate(m_componentCount, w, h);
 
+        // @@ We could avoid this allocation, write directly to dst_plane.
         Array<float> tmp_column(h);
         tmp_column.resize(h);
 
-        for (uint c = 0; c < m_componentNum; c++)
+        for (uint c = 0; c < m_componentCount; c++)
         {
-            float * tmp_channel = tmp_image->channel(c);
+            for (uint z = 0; z < m_depth; z++)
+            {
+                float * tmp_plane = tmp_image->plane(c, z);
 
-            for (uint y = 0; y < m_height; y++) {
-                this->applyKernelHorizontal(xkernel, y, c, wm, tmp_channel + y * w);
-            }
+                for (uint y = 0; y < m_height; y++) {
+                    this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w);
+                }
 
-            float * dst_channel = dst_image->channel(c);
+                float * dst_plane = dst_image->plane(c, z);
 
-            for (uint x = 0; x < w; x++) {
-                tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.buffer());
+                for (uint x = 0; x < w; x++) {
+                    tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer());
 
-                for (uint y = 0; y < h; y++) {
-                    dst_channel[y * w + x] = tmp_column[y];
+                    // @@ We could avoid this copy, write directly to dst_plane.
+                    for (uint y = 0; y < h; y++) {
+                        dst_plane[y * w + x] = tmp_column[y];
+                    }
                 }
             }
         }
     }
-    /*else
-    {
-    tmp_image->allocate(m_componentNum, m_width, h);
-    dst_image->allocate(m_componentNum, w, h);
+
+    return dst_image.release();
+}
+
+/// Downsample applying a 1D kernel separately in each dimension. (for 3d textures)
+FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const
+{
+    // @@ Use monophase filters when frac(m_width / w) == 0
+
+    // Use the existing 2d version if we are not resizing in the Z axis:
+    if (m_depth == d) {
+        return resize(filter, w, h, wm);
+    }
+
+    AutoPtr<FloatImage> tmp_image( new FloatImage() );
+    AutoPtr<FloatImage> tmp_image2( new FloatImage() );
+    AutoPtr<FloatImage> dst_image( new FloatImage() );
+
+    PolyphaseKernel xkernel(filter, m_width, w, 32);
+    PolyphaseKernel ykernel(filter, m_height, h, 32);
+    PolyphaseKernel zkernel(filter, m_depth, d, 32);
+
+    tmp_image->allocate(m_componentCount, w, m_height, m_depth);
+    tmp_image2->allocate(m_componentCount, w, m_height, d);
+    dst_image->allocate(m_componentCount, w, h, d);
 
     Array<float> tmp_column(h);
     tmp_column.resize(h);
 
-    for (uint c = 0; c < m_componentNum; c++)
+    for (uint c = 0; c < m_componentCount; c++)
     {
-    float * tmp_channel = tmp_image->channel(c);
+        float * tmp_channel = tmp_image->channel(c);
 
-    for (uint x = 0; x < w; x++) {
-    tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
+        // split width in half
+        for (uint z = 0; z < m_depth; z++ ) {
+            for (uint y = 0; y < m_height; y++) {
+                this->applyKernelX(xkernel, y, z, c, wm, tmp_channel + z * m_height * w + y * w);
+            }
+        }
 
-    for (uint y = 0; y < h; y++) {
-    tmp_channel[y * w + x] = tmp_column[y];
-    }
-    }
+        // split depth in half
+        float * tmp2_channel = tmp_image2->channel(c);
+        for (uint y = 0; y < m_height; y++) {
+            for (uint x = 0; x < w; x++) {
+                tmp_image->applyKernelZ(zkernel, x, y, c, wm, tmp_column.buffer() );
 
-    float * dst_channel = dst_image->channel(c);
+                for (uint z = 0; z < d; z++) {
+                    tmp2_channel[z * m_height * w + y * w + x] = tmp_column[z];
+                }
+            }
+        }
 
-    for (uint y = 0; y < m_height; y++) {
-    this->applyKernelHorizontal(xkernel, y, c, wm, dst_channel + y * w);
-    }
+        // split height in half
+        float * dst_channel = dst_image->channel(c);
+
+        for (uint z = 0; z < d; z++ ) {
+            for (uint x = 0; x < w; x++) {
+                tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer());
+
+                for (uint y = 0; y < h; y++) {
+                    dst_channel[z * h * w + y * w + x] = tmp_column[y];
+                }
+            }
+        }
     }
-    }*/
 
     return dst_image.release();
 }
 
+
 /// Downsample applying a 1D kernel separately in each dimension.
 FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const
 {
-    nvCheck(alpha < m_componentNum);
+    nvCheck(alpha < m_componentCount);
 
     AutoPtr<FloatImage> tmp_image( new FloatImage() );
     AutoPtr<FloatImage> dst_image( new FloatImage() );	
@@ -718,13 +834,13 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
     PolyphaseKernel ykernel(filter, m_height, h, 32);
 
     {
-        tmp_image->allocate(m_componentNum, w, m_height);
-        dst_image->allocate(m_componentNum, w, h);
+        tmp_image->allocate(m_componentCount, w, m_height);
+        dst_image->allocate(m_componentCount, w, h);
 
         Array<float> tmp_column(h);
         tmp_column.resize(h);
 
-        for (uint i = 0; i < m_componentNum; i++)
+        for (uint i = 0; i < m_componentCount; i++)
         {
             // Process alpha channel first.
             uint c;
@@ -732,19 +848,92 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
             else if (i > alpha) c = i;
             else c = i - 1;
 
-            float * tmp_channel = tmp_image->channel(c);
+            for (uint z = 0; z < m_depth; z++)
+            {
+                float * tmp_plane = tmp_image->plane(c, z);
+
+                for (uint y = 0; y < m_height; y++) {
+                    this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w);
+                }
+
+                float * dst_plane = dst_image->plane(c, z);
+
+                for (uint x = 0; x < w; x++) {
+                    tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer());
+
+                    // @@ Avoid this copy, write directly to dst_plane.
+                    for (uint y = 0; y < h; y++) {
+                        dst_plane[y * w + x] = tmp_column[y];
+                    }
+                }
+            }
+        }
+    }
+
+    return dst_image.release();
+}
+
+
+/// Downsample applying a 1D kernel separately in each dimension. (for 3d textures)
+FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const
+{
+    nvCheck(alpha < m_componentCount);
+
+    // use the existing 2d version if we are a 2d image:
+    if (m_depth == d) {
+        return resize( filter, w, h, wm, alpha );
+    }
+
+    AutoPtr<FloatImage> tmp_image( new FloatImage() );
+    AutoPtr<FloatImage> tmp_image2( new FloatImage() );
+    AutoPtr<FloatImage> dst_image( new FloatImage() );
+
+    PolyphaseKernel xkernel(filter, m_width, w, 32);
+    PolyphaseKernel ykernel(filter, m_height, h, 32);
+    PolyphaseKernel zkernel(filter, m_depth, d, 32);
 
+    tmp_image->allocate(m_componentCount, w, m_height, m_depth);
+    tmp_image2->allocate(m_componentCount, w, m_height, d);
+    dst_image->allocate(m_componentCount, w, h, d);
+
+    Array<float> tmp_column(h);
+    tmp_column.resize(h);
+
+    for (uint i = 0; i < m_componentCount; i++)
+    {
+        // Process alpha channel first.
+        uint c;
+        if (i == 0) c = alpha;
+        else if (i > alpha) c = i;
+        else c = i - 1;
+
+        float * tmp_channel = tmp_image->channel(c);
+
+        for (uint z = 0; z < m_depth; z++ ) {
             for (uint y = 0; y < m_height; y++) {
-                this->applyKernelHorizontal(xkernel, y, c, wm, tmp_channel + y * w);
+                this->applyKernelX(xkernel, y, z, c, wm, tmp_channel + z * m_height * w + y * w);
+            }
+        }
+
+        float * tmp2_channel = tmp_image2->channel(c);
+        for (uint y = 0; y < m_height; y++) {
+            for (uint x = 0; x < w; x++) {
+                tmp_image->applyKernelZ(zkernel, x, y, c, wm, tmp_column.buffer() );
+
+                for (uint z = 0; z < d; z++) {
+                    tmp2_channel[z * m_height * w + y * w + x] = tmp_column[z];
+                }
             }
+        }
 
-            float * dst_channel = dst_image->channel(c);
+        float * dst_channel = dst_image->channel(c);
 
+        for (uint z = 0; z < d; z++ ) {
             for (uint x = 0; x < w; x++) {
-                tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.buffer());
+                tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer());
 
                 for (uint y = 0; y < h; y++) {
-                    dst_channel[y * w + x] = tmp_column[y];
+                    dst_channel[z * h * w + y * w + x] = tmp_column[y];
                 }
             }
         }
@@ -756,25 +945,25 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
 
 
 /// Apply 2D kernel at the given coordinates and return result.
-float FloatImage::applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const
+float FloatImage::applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const
 {
     nvDebugCheck(k != NULL);
 
     const uint kernelWindow = k->windowSize();
     const int kernelOffset = int(kernelWindow / 2);
 
-    const float * channel = this->channel(c);
+    const float * channel = this->plane(c, z);
 
     float sum = 0.0f;
     for (uint i = 0; i < kernelWindow; i++)
     {
-        const int src_y = int(y + i) - kernelOffset;
+        int src_y = int(y + i) - kernelOffset;
 
         for (uint e = 0; e < kernelWindow; e++)
         {
-            const int src_x = int(x + e) - kernelOffset;
+            int src_x = int(x + e) - kernelOffset;
 
-            int idx = this->index(src_x, src_y, wm);
+            int idx = this->index(src_x, src_y, z, wm);
 
             sum += k->valueAt(e, i) * channel[idx];
         }
@@ -784,8 +973,30 @@ float FloatImage::applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode
 }
 
 
+/// Apply 1D horizontal kernel at the given coordinates and return result.
+float FloatImage::applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const
+{
+    nvDebugCheck(k != NULL);
+
+    const uint kernelWindow = k->windowSize();
+    const int kernelOffset = int(kernelWindow / 2);
+
+    const float * channel = this->channel(c);
+
+    float sum = 0.0f;
+    for (uint i = 0; i < kernelWindow; i++)
+    {
+        const int src_x = int(x + i) - kernelOffset;
+        const int idx = this->index(src_x, y, z, wm);
+
+        sum += k->valueAt(i) * channel[idx];
+    }
+
+    return sum;
+}
+
 /// Apply 1D vertical kernel at the given coordinates and return result.
-float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const
+float FloatImage::applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const
 {
     nvDebugCheck(k != NULL);
 
@@ -798,7 +1009,7 @@ float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, W
     for (uint i = 0; i < kernelWindow; i++)
     {
         const int src_y = int(y + i) - kernelOffset;
-        const int idx = this->index(x, src_y, wm);
+        const int idx = this->index(x, src_y, z, wm);
 
         sum += k->valueAt(i) * channel[idx];
     }
@@ -806,8 +1017,8 @@ float FloatImage::applyKernelVertical(const Kernel1 * k, int x, int y, uint c, W
     return sum;
 }
 
-/// Apply 1D horizontal kernel at the given coordinates and return result.
-float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const
+/// Apply 1D kernel in the z direction at the given coordinates and return result.
+float FloatImage::applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const
 {
     nvDebugCheck(k != NULL);
 
@@ -817,20 +1028,52 @@ float FloatImage::applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c,
     const float * channel = this->channel(c);
 
     float sum = 0.0f;
-    for (uint e = 0; e < kernelWindow; e++)
+    for (uint i = 0; i < kernelWindow; i++)
     {
-        const int src_x = int(x + e) - kernelOffset;
-        const int idx = this->index(src_x, y, wm);
+        const int src_z = int(z + i) - kernelOffset;
+        const int idx = this->index(x, y, src_z, wm);
 
-        sum += k->valueAt(e) * channel[idx];
+        sum += k->valueAt(i) * channel[idx];
     }
 
     return sum;
 }
 
 
+/// Apply 1D horizontal kernel at the given coordinates and return result.
+void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * __restrict output) const
+{
+    const uint length = k.length();
+    const float scale = float(length) / float(m_width);
+    const float iscale = 1.0f / scale;
+
+    const float width = k.width();
+    const int windowSize = k.windowSize();
+
+    const float * channel = this->channel(c);
+
+    for (uint i = 0; i < length; i++)
+    {
+        const float center = (0.5f + i) * iscale;
+
+        const int left = (int)floorf(center - width);
+        const int right = (int)ceilf(center + width);
+        nvDebugCheck(right - left <= windowSize);
+
+        float sum = 0;
+        for (int j = 0; j < windowSize; ++j)
+        {
+            const int idx = this->index(left + j, y, z, wm);
+
+            sum += k.valueAt(i, j) * channel[idx];
+        }
+
+        output[i] = sum;
+    }
+}
+
 /// Apply 1D vertical kernel at the given coordinates and return result.
-void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * __restrict output) const
+void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * __restrict output) const
 {
     const uint length = k.length();
     const float scale = float(length) / float(m_height);
@@ -852,7 +1095,7 @@ void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, W
         float sum = 0;
         for (int j = 0; j < windowSize; ++j)
         {
-            const int idx = this->index(x, j+left, wm);
+            const int idx = this->index(x, j+left, z, wm);
 
             sum += k.valueAt(i, j) * channel[idx];
         }
@@ -861,11 +1104,11 @@ void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, W
     }
 }
 
-/// Apply 1D horizontal kernel at the given coordinates and return result.
-void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * __restrict output) const
+/// Apply 1D kernel in the Z direction at the given coordinates and return result.
+void FloatImage::applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * __restrict output) const
 {
     const uint length = k.length();
-    const float scale = float(length) / float(m_width);
+    const float scale = float(length) / float(m_height);
     const float iscale = 1.0f / scale;
 
     const float width = k.width();
@@ -879,12 +1122,12 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c,
 
         const int left = (int)floorf(center - width);
         const int right = (int)ceilf(center + width);
-        nvDebugCheck(right - left <= windowSize);
+        nvCheck(right - left <= windowSize);
 
         float sum = 0;
         for (int j = 0; j < windowSize; ++j)
         {
-            const int idx = this->index(left + j, y, wm);
+            const int idx = this->index(x, y, j+left, wm);
 
             sum += k.valueAt(i, j) * channel[idx];
         }
@@ -894,8 +1137,44 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c,
 }
 
 
+/// Apply 1D horizontal kernel at the given coordinates and return result.
+void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * __restrict output) const
+{
+    const uint length = k.length();
+    const float scale = float(length) / float(m_width);
+    const float iscale = 1.0f / scale;
+
+    const float width = k.width();
+    const int windowSize = k.windowSize();
+
+    const float * channel = this->channel(c);
+    const float * alpha = this->channel(a);
+
+    for (uint i = 0; i < length; i++)
+    {
+        const float center = (0.5f + i) * iscale;
+
+        const int left = (int)floorf(center - width);
+        const int right = (int)ceilf(center + width);
+        nvDebugCheck(right - left <= windowSize);
+
+        float norm = 0.0f;
+        float sum = 0;
+        for (int j = 0; j < windowSize; ++j)
+        {
+            const int idx = this->index(left + j, y, z, wm);
+
+            float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
+            norm += w;
+            sum += w * channel[idx];
+        }
+
+        output[i] = sum / norm;
+    }
+}
+
 /// Apply 1D vertical kernel at the given coordinates and return result.
-void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * __restrict output) const
+void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * __restrict output) const
 {
     const uint length = k.length();
     const float scale = float(length) / float(m_height);
@@ -919,7 +1198,7 @@ void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, u
         float sum = 0;
         for (int j = 0; j < windowSize; ++j)
         {
-            const int idx = this->index(x, j+left, wm);
+            const int idx = this->index(x, j+left, z, wm);
 
             float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
             norm += w;
@@ -931,7 +1210,7 @@ void FloatImage::applyKernelVertical(const PolyphaseKernel & k, int x, uint c, u
 }
 
 /// Apply 1D horizontal kernel at the given coordinates and return result.
-void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * __restrict output) const
+void FloatImage::applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * __restrict output) const
 {
     const uint length = k.length();
     const float scale = float(length) / float(m_width);
@@ -955,7 +1234,7 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c,
         float sum = 0;
         for (int j = 0; j < windowSize; ++j)
         {
-            const int idx = this->index(left + j, y, wm);
+            const int idx = this->index(x, y, left + j, wm);
 
             float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f));
             norm += w;
@@ -966,25 +1245,66 @@ void FloatImage::applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c,
     }
 }
 
-// Vertical flip in place.
-void FloatImage::flip()
+
+void FloatImage::flipX()
 {
     const uint w = m_width;
     const uint h = m_height;
+    const uint d = m_depth;
+    const uint w2 = w / 2;
+
+    for (uint c = 0; c < m_componentCount; c++) {
+        for (uint z = 0; z < d; z++) {
+            for (uint y = 0; y < h; y++) {
+                float * line = scanline(c, y, z);
+                for (uint x = 0; x < w2; x++) {
+                    swap(line[x], line[w - 1 - x]);
+                }
+            }
+        }
+    }
+}
+
+void FloatImage::flipY()
+{
+    const uint w = m_width;
+    const uint h = m_height;
+    const uint d = m_depth;
     const uint h2 = h / 2;
 
-    for (uint c = 0; c < m_componentNum; c++) {
-        for (uint y = 0; y < h2; y++) {
-            float * src = scanline(y, c);
-            float * dst = scanline(h - 1 - y, c);
-            for (uint x = 0; x < w; x++) {
-                swap(src[x], dst[x]);
+    for (uint c = 0; c < m_componentCount; c++) {
+        for (uint z = 0; z < d; z++) {
+            for (uint y = 0; y < h2; y++) {
+                float * src = scanline(c, y, z);
+                float * dst = scanline(c, h - 1 - y, z);
+                for (uint x = 0; x < w; x++) {
+                    swap(src[x], dst[x]);
+                }
+            }
+        }
+    }
+}
+
+void FloatImage::flipZ()
+{
+    const uint w = m_width;
+    const uint h = m_height;
+    const uint d = m_depth;
+    const uint d2 = d / 2;
+
+    for (uint c = 0; c < m_componentCount; c++) {
+        for (uint z = 0; z < d/2; z++) {
+            float * src = plane(c, z);
+            float * dst = plane(c, d - 1 - z);
+            for (uint i = 0; i < w*h; i++) {
+                swap(src[i], dst[i]);
             }
         }
     }
 }
 
 
+
 float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel) const
 {
     const uint w = m_width;
@@ -992,11 +1312,11 @@ float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel) const
 
     float coverage = 0.0f;
 
-    for (uint y = 0; y < h; y++) {
-        const float * alpha = scanline(y, alphaChannel);
-        for (uint x = 0; x < w; x++) {
-            if (alpha[x] > alphaRef) coverage += 1.0f; // @@ gt or lt?
-        }
+    const float * alpha = channel(alphaChannel);
+
+    const uint count = m_pixelCount;
+    for (uint i = 0; i < count; i++) {
+        if (alpha[i] > alphaRef) coverage += 1.0f; // @@ gt or lt?
     }
 
     return coverage / float(w * h);
@@ -1038,8 +1358,8 @@ FloatImage* FloatImage::clone() const
 {
     FloatImage* copy = new FloatImage();
 
-    copy->allocate(m_componentNum, m_width, m_height);
-    memcpy(copy->m_mem, m_mem, m_count * sizeof(float));
+    copy->allocate(m_componentCount, m_width, m_height);
+    memcpy(copy->m_mem, m_mem, m_floatCount * sizeof(float));
 
     return copy;
 }
diff --git a/src/nvimage/FloatImage.h b/src/nvimage/FloatImage.h
index 9e8d7b6..f22c6c2 100644
--- a/src/nvimage/FloatImage.h
+++ b/src/nvimage/FloatImage.h
@@ -5,6 +5,8 @@
 
 #include "nvimage.h"
 
+#include "nvmath/nvmath.h" // lerp
+
 #include "nvcore/Debug.h"
 #include "nvcore/Utils.h" // clamp
 
@@ -45,7 +47,7 @@ namespace nv
 
         /** @name Allocation. */
         //@{
-        NVIMAGE_API void allocate(uint c, uint w, uint h);
+        NVIMAGE_API void allocate(uint c, uint w, uint h, uint d = 1);
         NVIMAGE_API void free(); // Does not clear members.
         NVIMAGE_API void resizeChannelCount(uint c);
         //@}
@@ -74,21 +76,30 @@ namespace nv
         NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm) const;
         NVIMAGE_API FloatImage * downSample(const Filter & filter, WrapMode wm, uint alpha) const;
         NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm) const;
+        NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const;
         NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const;
+        NVIMAGE_API FloatImage * resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const;
+
 
         //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, WrapMode wm) const;
         //NVIMAGE_API FloatImage * downSample(const Kernel1 & filter, uint w, uint h, WrapMode wm) const;
         //@}
 
-        NVIMAGE_API float applyKernel(const Kernel2 * k, int x, int y, uint c, WrapMode wm) const;
-        NVIMAGE_API float applyKernelVertical(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
-        NVIMAGE_API float applyKernelHorizontal(const Kernel1 * k, int x, int y, uint c, WrapMode wm) const;
-        NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, WrapMode wm, float * output) const;
-        NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, WrapMode wm, float * output) const;
-        NVIMAGE_API void applyKernelVertical(const PolyphaseKernel & k, int x, uint c, uint a, WrapMode wm, float * output) const;
-        NVIMAGE_API void applyKernelHorizontal(const PolyphaseKernel & k, int y, uint c, uint a, WrapMode wm, float * output) const;
+        NVIMAGE_API float applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const;
+        NVIMAGE_API float applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
+        NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
+        NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
+        NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const;
+        NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output) const;
+        NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const;
+        NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const;
+        NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output) const;
+        NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const;
+
 
-        NVIMAGE_API void flip();
+        NVIMAGE_API void flipX();
+        NVIMAGE_API void flipY();
+        NVIMAGE_API void flipZ();
 
         NVIMAGE_API float alphaTestCoverage(float alphaRef, int alphaChannel) const;
         NVIMAGE_API void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel);
@@ -96,37 +107,58 @@ namespace nv
 
         uint width() const { return m_width; }
         uint height() const { return m_height; }
-        uint componentNum() const { return m_componentNum; }
-        uint count() const { return m_count; }
+        uint depth() const { return m_depth; }
+        uint componentCount() const { return m_componentCount; }
+        uint floatCount() const { return m_floatCount; }
+        uint pixelCount() const { return m_pixelCount; }
 
 
+        // @@ It would make sense to swap the order of the arguments so that 'c' is always first.
+
         /** @name Pixel access. */
         //@{
         const float * channel(uint c) const;
         float * channel(uint c);
 
-        const float * scanline(uint y, uint c) const;
-        float * scanline(uint y, uint c);
+        const float * plane(uint c, uint z) const;
+        float * plane(uint c, uint z);
+
+        const float * scanline(uint c, uint y, uint z) const;
+        float * scanline(uint c, uint y, uint z);
 
-        float pixel(uint x, uint y, uint c) const;
-        float & pixel(uint x, uint y, uint c);
+        //float pixel(uint c, uint x, uint y) const;
+        //float & pixel(uint c, uint x, uint y);
 
-        float pixel(uint idx, uint c) const;
-        float & pixel(uint idx, uint c);
+        float pixel(uint c, uint x, uint y, uint z) const;
+        float & pixel(uint c, uint x, uint y, uint z);
+
+        float pixel(uint c, uint idx) const;
+        float & pixel(uint c, uint idx);
 
         float pixel(uint idx) const;
         float & pixel(uint idx);
 
-        float sampleNearest(float x, float y, int c, WrapMode wm) const;
-        float sampleLinear(float x, float y, int c, WrapMode wm) const;
+        float sampleNearest(uint c, float x, float y, WrapMode wm) const;
+        float sampleLinear(uint c, float x, float y, WrapMode wm) const;
+
+        float sampleNearest(uint c, float x, float y, float z, WrapMode wm) const;
+        float sampleLinear(uint c, float x, float y, float z, WrapMode wm) const;
+
+        float sampleNearestClamp(uint c, float x, float y) const;
+        float sampleNearestRepeat(uint c, float x, float y) const;
+        float sampleNearestMirror(uint c, float x, float y) const;
 
-        float sampleNearestClamp(float x, float y, int c) const;
-        float sampleNearestRepeat(float x, float y, int c) const;
-        float sampleNearestMirror(float x, float y, int c) const;
+        float sampleNearestClamp(uint c, float x, float y, float z) const;
+        float sampleNearestRepeat(uint c, float x, float y, float z) const;
+        float sampleNearestMirror(uint c, float x, float y, float z) const;
 
-        float sampleLinearClamp(float x, float y, int c) const;
-        float sampleLinearRepeat(float x, float y, int c) const;
-        float sampleLinearMirror(float x, float y, int c) const;
+        float sampleLinearClamp(uint c, float x, float y) const;
+        float sampleLinearRepeat(uint c, float x, float y) const;
+        float sampleLinearMirror(uint c, float x, float y) const;
+
+        float sampleLinearClamp(uint c, float x, float y, float z) const;
+        float sampleLinearRepeat(uint c, float x, float y, float z) const;
+        float sampleLinearMirror(uint c, float x, float y, float z) const;
         //@}
 
 
@@ -134,18 +166,23 @@ namespace nv
 
     public:
 
-        uint index(uint x, uint y) const;
-        uint indexClamp(int x, int y) const;
-        uint indexRepeat(int x, int y) const;
-        uint indexMirror(int x, int y) const;
-        uint index(int x, int y, WrapMode wm) const;
+        uint index(uint x, uint y, uint z) const;
+        uint indexClamp(int x, int y, int z) const;
+        uint indexRepeat(int x, int y, int z) const;
+        uint indexMirror(int x, int y, int z) const;
+        uint index(int x, int y, int z, WrapMode wm) const;
+
+        float bilerp(uint c, int ix0, int iy0, int ix1, int iy1, float fx, float fy) const;
+        float trilerp(uint c, int ix0, int iy0, int iz0, int ix1, int iy1, int iz1, float fx, float fy, float fz) const;
 
     public:
 
-        uint16 m_width;			///< Width of the texture.
-        uint16 m_height;		///< Height of the texture.
-        uint32 m_componentNum;	///< Number of components.
-        uint32 m_count;			///< Image pixel count.
+        uint16 m_componentCount;
+        uint16 m_width;
+        uint16 m_height;
+        uint16 m_depth;
+        uint32 m_pixelCount;
+        uint32 m_floatCount;
         float * m_mem;
 
     };
@@ -155,132 +192,202 @@ namespace nv
     inline const float * FloatImage::channel(uint c) const
     {
         nvDebugCheck(m_mem != NULL);
-        nvDebugCheck(c < m_componentNum);
-        return m_mem + c * m_width * m_height;
+        nvDebugCheck(c < m_componentCount);
+        return m_mem + c * m_pixelCount;
     }
 
     /// Get channel pointer.
     inline float * FloatImage::channel(uint c) {
         nvDebugCheck(m_mem != NULL);
-        nvDebugCheck(c < m_componentNum);
-        return m_mem + c * m_width * m_height;
+        nvDebugCheck(c < m_componentCount);
+        return m_mem + c * m_pixelCount;
+    }
+
+    inline const float * FloatImage::plane(uint c, uint z) const {
+        nvDebugCheck(z < m_depth);
+        return channel(c) + z * m_width * m_height;        
+    }
+
+    inline float * FloatImage::plane(uint c, uint z) {
+        nvDebugCheck(z < m_depth);
+        return channel(c) + z * m_width * m_height;        
     }
 
     /// Get const scanline pointer.
-    inline const float * FloatImage::scanline(uint y, uint c) const
+    inline const float * FloatImage::scanline(uint c, uint y, uint z) const
     {
         nvDebugCheck(y < m_height);
-        return channel(c) + y * m_width;
+        return plane(c, z) + y * m_width;
     }
 
     /// Get scanline pointer.
-    inline float * FloatImage::scanline(uint y, uint c)
+    inline float * FloatImage::scanline(uint z, uint y, uint c)
     {
         nvDebugCheck(y < m_height);
-        return channel(c) + y * m_width;
+        return plane(c, z) + y * m_width;
     }
 
     /// Get pixel component.
-    inline float FloatImage::pixel(uint x, uint y, uint c) const
+    inline float FloatImage::pixel(uint c, uint x, uint y, uint z) const
     {
         nvDebugCheck(m_mem != NULL);
+        nvDebugCheck(c < m_componentCount);
         nvDebugCheck(x < m_width);
         nvDebugCheck(y < m_height);
-        nvDebugCheck(c < m_componentNum);
-        return m_mem[(c * m_height + y) * m_width + x];
+        nvDebugCheck(z < m_depth);
+        return m_mem[((c * m_depth + z) * m_height + y) * m_width + x];
     }
 
     /// Get pixel component.
-    inline float & FloatImage::pixel(uint x, uint y, uint c)
+    inline float & FloatImage::pixel(uint c, uint x, uint y, uint z)
     {
         nvDebugCheck(m_mem != NULL);
+        nvDebugCheck(c < m_componentCount);
         nvDebugCheck(x < m_width);
         nvDebugCheck(y < m_height);
-        nvDebugCheck(c < m_componentNum);
-        return m_mem[(c * m_height + y) * m_width + x];
+        nvDebugCheck(z < m_depth);
+        return m_mem[((c * m_depth + z) * m_height + y) * m_width + x];
     }
 
     /// Get pixel component.
-    inline float FloatImage::pixel(uint idx, uint c) const
+    inline float FloatImage::pixel(uint c, uint idx) const
     {
         nvDebugCheck(m_mem != NULL);
-        nvDebugCheck(idx < uint(m_width*m_height));
-        nvDebugCheck(c < m_componentNum);
+        nvDebugCheck(c < m_componentCount);
+        nvDebugCheck(idx < m_pixelCount);
         return m_mem[c * m_height * m_width + idx];
     }
 
     /// Get pixel component.
-    inline float & FloatImage::pixel(uint idx, uint c)
+    inline float & FloatImage::pixel(uint c, uint idx)
     {
         nvDebugCheck(m_mem != NULL);
-        nvDebugCheck(idx < uint(m_width*m_height));
-        nvDebugCheck(c < m_componentNum);
+        nvDebugCheck(c < m_componentCount);
+        nvDebugCheck(idx < m_pixelCount);
         return m_mem[c * m_height * m_width + idx];
     }
 
     /// Get pixel component.
     inline float FloatImage::pixel(uint idx) const
     {
-        nvDebugCheck(idx < m_count);
+        nvDebugCheck(m_mem != NULL);
+        nvDebugCheck(idx < m_floatCount);
         return m_mem[idx];
     }
 
     /// Get pixel component.
     inline float & FloatImage::pixel(uint idx)
     {
-        nvDebugCheck(idx < m_count);
+        nvDebugCheck(m_mem != NULL);
+        nvDebugCheck(idx < m_floatCount);
         return m_mem[idx];
     }
 
-    inline uint FloatImage::index(uint x, uint y) const
+    inline uint FloatImage::index(uint x, uint y, uint z) const
     {
         nvDebugCheck(x < m_width);
         nvDebugCheck(y < m_height);
-        return y * m_width + x;
+        nvDebugCheck(z < m_depth);
+        return (z * m_height + y) * m_width + x;
     }
 
-    inline uint FloatImage::indexClamp(int x, int y) const
+
+    inline int wrapClamp(int x, int w)
+    {
+        return nv::clamp(x, 0, w - 1);
+    }
+    inline int wrapRepeat(int x, int w)
     {
-        return nv::clamp(y, int(0), int(m_height-1)) * m_width + nv::clamp(x, int(0), int(m_width-1));
+        if (x >= 0) return x % w;
+        else return (x + 1) % w + w - 1;
     }
+    inline int wrapMirror(int x, int w)
+    {
+        if (w == 1) x = 0;
 
-    inline int repeat_remainder(int a, int b)
+        x = abs(x);
+        while (x >= w) {
+            x = abs(w + w - x - 2);
+        }
+
+        return x;
+    }
+
+
+
+    inline uint FloatImage::indexClamp(int x, int y, int z) const
     {
-        if (a >= 0) return a % b;
-        else return (a + 1) % b + b - 1;
+        x = wrapClamp(x, m_width - 1);
+        y = wrapClamp(y, m_height - 1);
+        z = wrapClamp(z, m_depth - 1);
+        return index(x, y, z);
     }
 
-    inline uint FloatImage::indexRepeat(int x, int y) const
+
+    inline uint FloatImage::indexRepeat(int x, int y, int z) const
     {
-        return repeat_remainder(y, m_height) * m_width + repeat_remainder(x, m_width);
+        x = wrapRepeat(x, m_width);
+        y = wrapRepeat(y, m_height);
+        z = wrapRepeat(z, m_depth);
+        return index(x, y, z);
+   }
+
+    inline uint FloatImage::indexMirror(int x, int y, int z) const
+    {
+        x = wrapMirror(x, m_width);
+        y = wrapMirror(y, m_height);
+        z = wrapMirror(z, m_depth);
+        return index(x, y, z);
     }
 
-    inline uint FloatImage::indexMirror(int x, int y) const
+    inline uint FloatImage::index(int x, int y, int z, WrapMode wm) const
     {
-        if (m_width == 1) x = 0;
+        if (wm == WrapMode_Clamp) return indexClamp(x, y, z);
+        if (wm == WrapMode_Repeat) return indexRepeat(x, y, z);
+        /*if (wm == WrapMode_Mirror)*/ return indexMirror(x, y, z);
+    }
 
-        x = abs(x);
-        while (x >= m_width) {
-            x = abs(m_width + m_width - x - 2);
-        }
+    inline float FloatImage::bilerp(uint c, int ix0, int iy0, int ix1, int iy1, float fx, float fy) const {
+        int iz = 0;
+        float f1 = pixel(c, ix0, iy0, iz);
+        float f2 = pixel(c, ix1, iy0, iz);
+        float f3 = pixel(c, ix0, iy1, iz);
+        float f4 = pixel(c, ix1, iy1, iz);
 
-        if (m_height == 1) y = 0;
+        float i1 = lerp(f1, f2, fx);
+        float i2 = lerp(f3, f4, fx);
 
-        y = abs(y);
-        while (y >= m_height) {
-            y = abs(m_height + m_height - y - 2);
-        }
+        return lerp(i1, i2, fy);
+    }
 
-        return index(x, y);
+    inline float FloatImage::trilerp(uint c, int ix0, int iy0, int iz0, int ix1, int iy1, int iz1, float fx, float fy, float fz) const {
+        float f000 = pixel(c, ix0, iy0, iz0);
+        float f100 = pixel(c, ix1, iy0, iz0);
+        float f010 = pixel(c, ix0, iy1, iz0);
+        float f110 = pixel(c, ix1, iy1, iz0);
+        float f001 = pixel(c, ix0, iy0, iz1);
+        float f101 = pixel(c, ix1, iy0, iz1);
+        float f011 = pixel(c, ix0, iy1, iz1);
+        float f111 = pixel(c, ix1, iy1, iz1);
+
+        float i1 = lerp(f000, f001, fz);
+        float i2 = lerp(f010, f011, fz);
+        float j1 = lerp(f100, f101, fz);
+        float j2 = lerp(f110, f111, fz);
+
+        float w1 = lerp(i1, i2, fy);
+        float w2 = lerp(j1, j2, fy);
+
+        return lerp(w1, w2, fx);
     }
 
-    inline uint FloatImage::index(int x, int y, WrapMode wm) const
-    {
-        if (wm == WrapMode_Clamp) return indexClamp(x, y);
-        if (wm == WrapMode_Repeat) return indexRepeat(x, y);
-        /*if (wm == WrapMode_Mirror)*/ return indexMirror(x, y);
+    // Does not compare channel count.
+    inline bool sameLayout(const FloatImage * img0, const FloatImage * img1) {
+        if (img0 == NULL || img1 == NULL) return false;
+        return img0->width() == img1->width() && img0->height() == img1->height() && img0->depth() == img1->depth();
     }
 
+
 } // nv namespace
 
 
diff --git a/src/nvimage/Image.cpp b/src/nvimage/Image.cpp
index 2b4bfbe..006c324 100644
--- a/src/nvimage/Image.cpp
+++ b/src/nvimage/Image.cpp
@@ -18,9 +18,9 @@ Image::Image() : m_width(0), m_height(0), m_format(Format_RGB), m_data(NULL)
 
 Image::Image(const Image & img) : m_data(NULL)
 {
-    allocate(img.m_width, img.m_height);
+	allocate(img.m_width, img.m_height, img.m_depth);
     m_format = img.m_format;
-    memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height);
+    memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth);
 }
 
 Image::~Image()
@@ -30,19 +30,20 @@ Image::~Image()
 
 const Image & Image::operator=(const Image & img)
 {
-    allocate(img.m_width, img.m_height);
+    allocate(img.m_width, img.m_height, m_depth);
     m_format = img.m_format;
-    memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height);
+    memcpy(m_data, img.m_data, sizeof(Color32) * m_width * m_height * m_depth);
     return *this;
 }
 
 
-void Image::allocate(uint w, uint h)
+void Image::allocate(uint w, uint h, uint d)
 {
     free();
     m_width = w;
     m_height = h;
-    m_data = realloc<Color32>(m_data, w * h);
+	m_depth = d;
+    m_data = realloc<Color32>(m_data, w * h * d);
 }
 
 bool Image::load(const char * name)
@@ -56,18 +57,20 @@ bool Image::load(const char * name)
 
     swap(m_width, img->m_width);
     swap(m_height, img->m_height);
+	swap(m_depth, img->m_depth);
     swap(m_format, img->m_format);
     swap(m_data, img->m_data);
 
     return true;
 }
 
-void Image::wrap(void * data, uint w, uint h)
+void Image::wrap(void * data, uint w, uint h, uint d)
 {
     free();
     m_data = (Color32 *)data;
     m_width = w;
     m_height = h;
+	m_depth = d;
 }
 
 void Image::unwrap()
@@ -75,6 +78,7 @@ void Image::unwrap()
     m_data = NULL;
     m_width = 0;
     m_height = 0;
+	m_depth = 0;
 }
 
 
@@ -95,6 +99,11 @@ uint Image::height() const
     return m_height;
 }
 
+uint Image::depth() const
+{
+	return m_depth;
+}
+
 const Color32 * Image::scanline(uint h) const
 {
     nvDebugCheck(h < m_height);
@@ -119,13 +128,13 @@ Color32 * Image::pixels()
 
 const Color32 & Image::pixel(uint idx) const
 {
-    nvDebugCheck(idx < m_width * m_height);
+    nvDebugCheck(idx < m_width * m_height * m_depth);
     return m_data[idx];
 }
 
 Color32 & Image::pixel(uint idx)
 {
-    nvDebugCheck(idx < m_width * m_height);
+    nvDebugCheck(idx < m_width * m_height * m_depth);
     return m_data[idx];
 }
 
@@ -142,7 +151,7 @@ void Image::setFormat(Image::Format f)
 
 void Image::fill(Color32 c)
 {
-    const uint size = m_width * m_height;
+    const uint size = m_width * m_height * m_depth;
     for (uint i = 0; i < size; ++i)
     {
         m_data[i] = c;
diff --git a/src/nvimage/Image.h b/src/nvimage/Image.h
index e576079..7d44a4a 100644
--- a/src/nvimage/Image.h
+++ b/src/nvimage/Image.h
@@ -29,14 +29,15 @@ namespace nv
         const Image & operator=(const Image & img);
 
 
-        void allocate(uint w, uint h);
+        void allocate(uint w, uint h, uint d = 1);
         bool load(const char * name);
 
-        void wrap(void * data, uint w, uint h);
+        void wrap(void * data, uint w, uint h, uint d = 1);
         void unwrap();
 
         uint width() const;
         uint height() const;
+        uint depth() const;
 
         const Color32 * scanline(uint h) const;
         Color32 * scanline(uint h);
@@ -61,6 +62,7 @@ namespace nv
     private:
         uint m_width;
         uint m_height;
+        uint m_depth;
         Format m_format;
         Color32 * m_data;
     };
diff --git a/src/nvimage/ImageIO.cpp b/src/nvimage/ImageIO.cpp
index f73aa84..b80bc3e 100644
--- a/src/nvimage/ImageIO.cpp
+++ b/src/nvimage/ImageIO.cpp
@@ -275,9 +275,9 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
 {
     if (componentCount == 0)
     {
-        componentCount = fimage->componentNum() - baseComponent;
+        componentCount = fimage->componentCount() - baseComponent;
     }
-    if (baseComponent + componentCount < fimage->componentNum())
+    if (baseComponent + componentCount < fimage->componentCount())
     {
         return false;
     }
@@ -430,7 +430,7 @@ Image * nv::ImageIO::loadFreeImage(FREE_IMAGE_FORMAT fif, Stream & s)
 
 
     Image * image = new Image();
-    image->allocate(w, h);
+    image->allocate(w, h, 1); // freeimage can only load 2d images:
 
     // Copy the image over to our internal format, FreeImage has the scanlines bottom to top though.
     for (int y=0; y < h; y++)
@@ -1575,7 +1575,7 @@ bool nv::ImageIO::saveFloatTIFF(const char * fileName, const FloatImage * fimage
 {
     nvCheck(fileName != NULL);
     nvCheck(fimage != NULL);
-    nvCheck(base_component + num_components <= fimage->componentNum());
+    nvCheck(base_component + num_components <= fimage->componentCount());
 
     const int iW = fimage->width();
     const int iH = fimage->height();
@@ -1740,7 +1740,7 @@ bool nv::ImageIO::saveFloatEXR(const char * fileName, const FloatImage * fimage,
 {
     nvCheck(fileName != NULL);
     nvCheck(fimage != NULL);
-    nvCheck(base_component + num_components <= fimage->componentNum());
+    nvCheck(base_component + num_components <= fimage->componentCount());
     nvCheck(num_components > 0 && num_components <= 4);
 
     const int w = fimage->width();
diff --git a/src/nvimage/NormalMap.cpp b/src/nvimage/NormalMap.cpp
index c4981c0..7461a80 100644
--- a/src/nvimage/NormalMap.cpp
+++ b/src/nvimage/NormalMap.cpp
@@ -36,172 +36,172 @@ using namespace nv;
 // Create normal map using the given kernels.
 static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv)
 {
-	nvDebugCheck(kdu != NULL);
-	nvDebugCheck(kdv != NULL);
-	nvDebugCheck(img != NULL);
-	
-	const uint w = img->width();
-	const uint h = img->height();
-	
-	AutoPtr<FloatImage> fimage(new FloatImage());
-	fimage->allocate(4, w, h);
-	
-	// Compute height and store in alpha channel:
-	float * alphaChannel = fimage->channel(3);
-	for(uint i = 0; i < w*h; i++)
-	{
-		Vector4 color = toVector4(img->pixel(i));
-		alphaChannel[i] = dot(color, heightWeights);
-	}
-	
-	float heightScale = 1.0f / 16.0f;	// @@ Use a user defined factor.
-	
-	for(uint y = 0; y < h; y++)
-	{
-		for(uint x = 0; x < w; x++)
-		{
-			const float du = fimage->applyKernel(kdu, x, y, 3, wm);
-			const float dv = fimage->applyKernel(kdv, x, y, 3, wm);
-			
-			Vector3 n = normalize(Vector3(du, dv, heightScale));
-			
-			fimage->pixel(x, y, 0) = 0.5f * n.x + 0.5f;
-			fimage->pixel(x, y, 1) = 0.5f * n.y + 0.5f;
-			fimage->pixel(x, y, 2) = 0.5f * n.z + 0.5f;
-		}
-	}
-	
-	return fimage.release();
+    nvDebugCheck(kdu != NULL);
+    nvDebugCheck(kdv != NULL);
+    nvDebugCheck(img != NULL);
+
+    const uint w = img->width();
+    const uint h = img->height();
+
+    AutoPtr<FloatImage> fimage(new FloatImage());
+    fimage->allocate(4, w, h);
+
+    // Compute height and store in alpha channel:
+    float * alphaChannel = fimage->channel(3);
+    for(uint i = 0; i < w * h; i++)
+    {
+        Vector4 color = toVector4(img->pixel(i));
+        alphaChannel[i] = dot(color, heightWeights);
+    }
+
+    float heightScale = 1.0f / 16.0f;	// @@ Use a user defined factor.
+
+    for(uint y = 0; y < h; y++)
+    {
+        for(uint x = 0; x < w; x++)
+        {
+            const float du = fimage->applyKernelXY(kdu, x, y, 0, 3, wm);
+            const float dv = fimage->applyKernelXY(kdv, x, y, 0, 3, wm);
+
+            Vector3 n = normalize(Vector3(du, dv, heightScale));
+
+            fimage->pixel(x, y, 0, 0) = 0.5f * n.x + 0.5f;
+            fimage->pixel(x, y, 0, 1) = 0.5f * n.y + 0.5f;
+            fimage->pixel(x, y, 0, 2) = 0.5f * n.z + 0.5f;
+        }
+    }
+
+    return fimage.release();
 }
 
 
 // Create normal map using the given kernels.
 static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv)
 {
-	nvDebugCheck(kdu != NULL);
-	nvDebugCheck(kdv != NULL);
-	nvDebugCheck(img != NULL);
+    nvDebugCheck(kdu != NULL);
+    nvDebugCheck(kdv != NULL);
+    nvDebugCheck(img != NULL);
 
 #pragma NV_MESSAGE("FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.")
-	const float heightScale = 1.0f / 16.0f;
+    const float heightScale = 1.0f / 16.0f;
 
-	const uint w = img->width();
-	const uint h = img->height();
+    const uint w = img->width();
+    const uint h = img->height();
 
-	AutoPtr<FloatImage> img_out(new FloatImage());
-	img_out->allocate(4, w, h);
+    AutoPtr<FloatImage> img_out(new FloatImage());
+    img_out->allocate(4, w, h);
 
-	for (uint y = 0; y < h; y++)
-	{
-		for (uint x = 0; x < w; x++)
-		{
-			const float du = img->applyKernel(kdu, x, y, 3, wm);
-			const float dv = img->applyKernel(kdv, x, y, 3, wm);
+    for (uint y = 0; y < h; y++)
+    {
+        for (uint x = 0; x < w; x++)
+        {
+            const float du = img->applyKernelXY(kdu, x, y, 0, 3, wm);
+            const float dv = img->applyKernelXY(kdv, x, y, 0, 3, wm);
 
-			Vector3 n = normalize(Vector3(du, dv, heightScale));
+            Vector3 n = normalize(Vector3(du, dv, heightScale));
 
-			img_out->pixel(x, y, 0) = n.x;
-			img_out->pixel(x, y, 1) = n.y;
-			img_out->pixel(x, y, 2) = n.z;
-		}
+	    img_out->pixel(x, y, 0, 0) = n.x;
+	    img_out->pixel(x, y, 0, 1) = n.y;
+	    img_out->pixel(x, y, 0, 2) = n.z;
 	}
-
-	// Copy alpha channel.
-	for (uint y = 0; y < h; y++)
-	{
-		for (uint x = 0; x < w; x++)
-		{
-			img_out->pixel(x, y, 3) = img->pixel(x, y, 3);
-		}
-	}
-
-	return img_out.release();
+    }
+
+    // Copy alpha channel.
+    for (uint y = 0; y < h; y++)
+    {
+        for (uint x = 0; x < w; x++)
+        {
+            img_out->pixel(x, y, 0, 3) = img->pixel(x, y, 0, 3);
+        }
+    }
+
+    return img_out.release();
 }
 
 
 /// Create normal map using the given filter.
 FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/)
 {
-	nvDebugCheck(img != NULL);
-	
-	// Init the kernels.
-	Kernel2 * kdu = NULL;
-	Kernel2 * kdv = NULL;
-
-	switch(filter)
-	{
-		case NormalMapFilter_Sobel3x3:
-			kdu = new Kernel2(3);
-			break;
-		case NormalMapFilter_Sobel5x5:
-			kdu = new Kernel2(5);
-			break;
-		case NormalMapFilter_Sobel7x7:
-			kdu = new Kernel2(7);
-			break;
-		case NormalMapFilter_Sobel9x9:
-			kdu = new Kernel2(9);
-			break;
-		default:
-			nvDebugCheck(false);
-	};
-
-	kdu->initSobel();
-	kdu->normalize();
-
-	kdv = new Kernel2(*kdu);
-	kdv->transpose();
-
-	return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
+    nvDebugCheck(img != NULL);
+
+    // Init the kernels.
+    Kernel2 * kdu = NULL;
+    Kernel2 * kdv = NULL;
+
+    switch(filter)
+    {
+        case NormalMapFilter_Sobel3x3:
+            kdu = new Kernel2(3);
+            break;
+        case NormalMapFilter_Sobel5x5:
+            kdu = new Kernel2(5);
+            break;
+        case NormalMapFilter_Sobel7x7:
+            kdu = new Kernel2(7);
+            break;
+        case NormalMapFilter_Sobel9x9:
+            kdu = new Kernel2(9);
+            break;
+        default:
+            nvDebugCheck(false);
+    };
+
+    kdu->initSobel();
+    kdu->normalize();
+
+    kdv = new Kernel2(*kdu);
+    kdv->transpose();
+
+    return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
 }
 
 
 /// Create normal map combining multiple sobel filters.
 FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights)
 {
-	nvDebugCheck(img != NULL);
-
-	Kernel2 * kdu = NULL;
-	Kernel2 * kdv = NULL;
-
-	kdu = new Kernel2(9);
-	kdu->initBlendedSobel(filterWeights);
-	kdu->normalize();
-	
-	kdv = new Kernel2(*kdu);
-	kdv->transpose();
-	
-	return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
+    nvDebugCheck(img != NULL);
+
+    Kernel2 * kdu = NULL;
+    Kernel2 * kdv = NULL;
+
+    kdu = new Kernel2(9);
+    kdu->initBlendedSobel(filterWeights);
+    kdu->normalize();
+
+    kdv = new Kernel2(*kdu);
+    kdv->transpose();
+
+    return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
 }
 
 
 FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights)
 {
-	nvDebugCheck(img != NULL);
+    nvDebugCheck(img != NULL);
 
-	Kernel2 * kdu = NULL;
-	Kernel2 * kdv = NULL;
+    Kernel2 * kdu = NULL;
+    Kernel2 * kdv = NULL;
 
-	kdu = new Kernel2(9);
-	kdu->initBlendedSobel(filterWeights);
-	kdu->normalize();
+    kdu = new Kernel2(9);
+    kdu->initBlendedSobel(filterWeights);
+    kdu->normalize();
 
-	kdv = new Kernel2(*kdu);
-	kdv->transpose();
+    kdv = new Kernel2(*kdu);
+    kdv->transpose();
 
-	return ::createNormalMap(img, wm, kdu, kdv);
+    return ::createNormalMap(img, wm, kdu, kdv);
 }
 
 
 /// Normalize the given image in place.
 void nv::normalizeNormalMap(FloatImage * img)
 {
-	nvDebugCheck(img != NULL);
+    nvDebugCheck(img != NULL);
 
 #pragma NV_MESSAGE("TODO: Pack and expand normals explicitly?")
 
-	img->expandNormals(0);
-	img->normalize(0);
-	img->packNormals(0);
+    img->expandNormals(0);
+    img->normalize(0);
+    img->packNormals(0);
 }
 
diff --git a/src/nvtt/Compressor.h b/src/nvtt/Compressor.h
index 6ad84c1..f55d94c 100644
--- a/src/nvtt/Compressor.h
+++ b/src/nvtt/Compressor.h
@@ -33,7 +33,7 @@ namespace nv
     struct CompressorInterface
     {
         virtual ~CompressorInterface() {}
-        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0;
+        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) = 0;
     };
 
 } // nv namespace
diff --git a/src/nvtt/CompressorDX9.cpp b/src/nvtt/CompressorDX9.cpp
index 60f19ae..532d03b 100644
--- a/src/nvtt/CompressorDX9.cpp
+++ b/src/nvtt/CompressorDX9.cpp
@@ -301,214 +301,132 @@ void NormalCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alp
 }
 
 
-#if defined(HAVE_S3QUANT)
-
-void S3CompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-	float error = 0.0f;
-
-	BlockDXT1 dxtBlock3;
-	BlockDXT1 dxtBlock4;
-	ColorBlock block;
-
-	for (uint y = 0; y < h; y += 4) {
-		for (uint x = 0; x < w; x += 4) {
-			block.init(inputFormat, w, h, data, x, y);
-
-			// Init rgb block.
-			RGBBlock rgbBlock;
-			rgbBlock.n = 16;
-			for (uint i = 0; i < 16; i++) {
-				rgbBlock.colorChannel[i][0] = clamp(float(block.color(i).r) / 255.0f, 0.0f, 1.0f);
-				rgbBlock.colorChannel[i][1] = clamp(float(block.color(i).g) / 255.0f, 0.0f, 1.0f);
-				rgbBlock.colorChannel[i][2] = clamp(float(block.color(i).b) / 255.0f, 0.0f, 1.0f);
-			}
-			rgbBlock.weight[0] = 1.0f;
-			rgbBlock.weight[1] = 1.0f;
-			rgbBlock.weight[2] = 1.0f;
-
-			rgbBlock.inLevel = 4;
-			CodeRGBBlock(&rgbBlock);
-
-			// Copy results to DXT block.
-			dxtBlock4.col0.r = rgbBlock.endPoint[0][0];
-			dxtBlock4.col0.g = rgbBlock.endPoint[0][1];
-			dxtBlock4.col0.b = rgbBlock.endPoint[0][2];
-
-			dxtBlock4.col1.r = rgbBlock.endPoint[1][0];
-			dxtBlock4.col1.g = rgbBlock.endPoint[1][1];
-			dxtBlock4.col1.b = rgbBlock.endPoint[1][2];
-
-			dxtBlock4.setIndices(rgbBlock.index);
-
-			if (dxtBlock4.col0.u < dxtBlock4.col1.u) {
-				swap(dxtBlock4.col0.u, dxtBlock4.col1.u);
-				dxtBlock4.indices ^= 0x55555555;
-			}
-
-			uint error4 = blockError(block, dxtBlock4);
-
-			rgbBlock.inLevel = 3;
-
-			CodeRGBBlock(&rgbBlock);
-
-			// Copy results to DXT block.
-			dxtBlock3.col0.r = rgbBlock.endPoint[0][0];
-			dxtBlock3.col0.g = rgbBlock.endPoint[0][1];
-			dxtBlock3.col0.b = rgbBlock.endPoint[0][2];
-
-			dxtBlock3.col1.r = rgbBlock.endPoint[1][0];
-			dxtBlock3.col1.g = rgbBlock.endPoint[1][1];
-			dxtBlock3.col1.b = rgbBlock.endPoint[1][2];
-
-			dxtBlock3.setIndices(rgbBlock.index);
-
-			if (dxtBlock3.col0.u > dxtBlock3.col1.u) {
-				swap(dxtBlock3.col0.u, dxtBlock3.col1.u);
-				dxtBlock3.indices ^= (~dxtBlock3.indices  >> 1) & 0x55555555;
-			}
-
-			uint error3 = blockError(block, dxtBlock3);
-
-			if (error3 < error4) {
-				error += error3;
-
-				if (outputOptions.outputHandler != NULL) {
-					outputOptions.outputHandler->writeData(&dxtBlock3, sizeof(dxtBlock3));
-				}
-			}
-			else {
-				error += error4;
-
-				if (outputOptions.outputHandler != NULL) {
-					outputOptions.outputHandler->writeData(&dxtBlock4, sizeof(dxtBlock4));
-				}
-			}
-		}
-	}
-}
-
-#endif // defined(HAVE_S3QUANT)
-
-
 #if defined(HAVE_ATITC)
 
-void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
-	// Init source texture
-	ATI_TC_Texture srcTexture;
-	srcTexture.dwSize = sizeof(srcTexture);
-	srcTexture.dwWidth = w;
-	srcTexture.dwHeight = h;
-	if (inputFormat == nvtt::InputFormat_BGRA_8UB)
-	{
-		srcTexture.dwPitch = w * 4;
-		srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
-	}
-	else
-	{
-		srcTexture.dwPitch = w * 16;
-		srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
-	}
-	srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
-	srcTexture.pData = (ATI_TC_BYTE*) data;
-
-	// Init dest texture
-	ATI_TC_Texture destTexture;
-	destTexture.dwSize = sizeof(destTexture);
-	destTexture.dwWidth = w;
-	destTexture.dwHeight = h;
-	destTexture.dwPitch = 0;
-	destTexture.format = ATI_TC_FORMAT_DXT1;
-	destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
-	destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
-
-	ATI_TC_CompressOptions options;
-	options.dwSize = sizeof(options);
-	options.bUseChannelWeighting = false;
-	options.bUseAdaptiveWeighting = false;
-	options.bDXT1UseAlpha = false;
-	options.nCompressionSpeed = ATI_TC_Speed_Normal;
-	options.bDisableMultiThreading = false;
-	//options.bDisableMultiThreading = true;
-
-	// Compress
-	ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
-
-	if (outputOptions.outputHandler != NULL) {
-		outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
-	}
-
-	mem::free(destTexture.pData);
+    nvDebugCheck(d == 1);
+
+    // Init source texture
+    ATI_TC_Texture srcTexture;
+    srcTexture.dwSize = sizeof(srcTexture);
+    srcTexture.dwWidth = w;
+    srcTexture.dwHeight = h;
+    if (inputFormat == nvtt::InputFormat_BGRA_8UB)
+    {
+        srcTexture.dwPitch = w * 4;
+        srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
+    }
+    else
+    {
+        // @@ Floating point input is not swizzled.
+        srcTexture.dwPitch = w * 16;
+        srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
+    }
+    srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
+    srcTexture.pData = (ATI_TC_BYTE*) data;
+
+    // Init dest texture
+    ATI_TC_Texture destTexture;
+    destTexture.dwSize = sizeof(destTexture);
+    destTexture.dwWidth = w;
+    destTexture.dwHeight = h;
+    destTexture.dwPitch = 0;
+    destTexture.format = ATI_TC_FORMAT_DXT1;
+    destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
+    destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
+
+    ATI_TC_CompressOptions options;
+    options.dwSize = sizeof(options);
+    options.bUseChannelWeighting = false;
+    options.bUseAdaptiveWeighting = false;
+    options.bDXT1UseAlpha = false;
+    options.nCompressionSpeed = ATI_TC_Speed_Normal;
+    options.bDisableMultiThreading = false;
+    //options.bDisableMultiThreading = true;
+
+    // Compress
+    ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
+
+    if (outputOptions.outputHandler != NULL) {
+            outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
+    }
+
+    mem::free(destTexture.pData);
 }
 
-void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
-	// Init source texture
-	ATI_TC_Texture srcTexture;
-	srcTexture.dwSize = sizeof(srcTexture);
-	srcTexture.dwWidth = w;
-	srcTexture.dwHeight = h;
-	if (inputFormat == nvtt::InputFormat_BGRA_8UB)
-	{
-		srcTexture.dwPitch = w * 4;
-		srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
-	}
-	else
-	{
-		srcTexture.dwPitch = w * 16;
-		srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
-	}
-	srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
-	srcTexture.pData = (ATI_TC_BYTE*) data;
-
-	// Init dest texture
-	ATI_TC_Texture destTexture;
-	destTexture.dwSize = sizeof(destTexture);
-	destTexture.dwWidth = w;
-	destTexture.dwHeight = h;
-	destTexture.dwPitch = 0;
-	destTexture.format = ATI_TC_FORMAT_DXT5;
-	destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
-	destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
-
-	// Compress
-	ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
-
-	if (outputOptions.outputHandler != NULL) {
-		outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
-	}
-
-	mem::free(destTexture.pData);
+    nvDebugCheck(d == 1);
+
+    // Init source texture
+    ATI_TC_Texture srcTexture;
+    srcTexture.dwSize = sizeof(srcTexture);
+    srcTexture.dwWidth = w;
+    srcTexture.dwHeight = h;
+    if (inputFormat == nvtt::InputFormat_BGRA_8UB)
+    {
+        srcTexture.dwPitch = w * 4;
+        srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
+    }
+    else
+    {
+        srcTexture.dwPitch = w * 16;
+        srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
+    }
+    srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
+    srcTexture.pData = (ATI_TC_BYTE*) data;
+
+    // Init dest texture
+    ATI_TC_Texture destTexture;
+    destTexture.dwSize = sizeof(destTexture);
+    destTexture.dwWidth = w;
+    destTexture.dwHeight = h;
+    destTexture.dwPitch = 0;
+    destTexture.format = ATI_TC_FORMAT_DXT5;
+    destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
+    destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
+
+    // Compress
+    ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
+
+    if (outputOptions.outputHandler != NULL) {
+        outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
+    }
+
+    mem::free(destTexture.pData);
 }
 
 #endif // defined(HAVE_ATITC)
 
 #if defined(HAVE_SQUISH)
 
-void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
-#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
-	/*
-	Image img(*image);
-	int count = img.width() * img.height();
-	for (int i = 0; i < count; i++)
-	{
-		Color32 c = img.pixel(i);
-		img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
-	}
-
-	int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
-	void * blocks = mem::malloc(size);
+    nvDebugCheck(d == 1);
+    nvDebugCheck(false);
 
-	squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
-
-	if (outputOptions.outputHandler != NULL) {
-		outputOptions.outputHandler->writeData(blocks, size);
-	}
-
-	mem::free(blocks);
-	*/
+#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
+    /*
+    Image img(*image);
+    int count = img.width() * img.height();
+    for (int i = 0; i < count; i++)
+    {
+            Color32 c = img.pixel(i);
+            img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
+    }
+
+    int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
+    void * blocks = mem::malloc(size);
+
+    squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
+
+    if (outputOptions.outputHandler != NULL) {
+            outputOptions.outputHandler->writeData(blocks, size);
+    }
+
+    mem::free(blocks);
+    */
 }
 
 #endif // defined(HAVE_SQUISH)
@@ -516,62 +434,64 @@ void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMo
 
 #if defined(HAVE_D3DX)
 
-void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
-	IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
+    nvDebugCheck(d == 1);
 
-	D3DPRESENT_PARAMETERS presentParams;
-	ZeroMemory(&presentParams, sizeof(presentParams));
-	presentParams.Windowed = TRUE;
-	presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
-	presentParams.BackBufferWidth = 8;
-	presentParams.BackBufferHeight = 8;
-	presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
+    IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
 
-	HRESULT err;
+    D3DPRESENT_PARAMETERS presentParams;
+    ZeroMemory(&presentParams, sizeof(presentParams));
+    presentParams.Windowed = TRUE;
+    presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
+    presentParams.BackBufferWidth = 8;
+    presentParams.BackBufferHeight = 8;
+    presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
 
-	IDirect3DDevice9 * device = NULL;
-	err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
+    HRESULT err;
 
-	IDirect3DTexture9 * texture = NULL;
-	err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
-	
-	IDirect3DSurface9 * surface = NULL;
-	err = texture->GetSurfaceLevel(0, &surface);
+    IDirect3DDevice9 * device = NULL;
+    err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
 
-	RECT rect;
-	rect.left = 0; 
-	rect.top = 0; 
-	rect.bottom = h;
-	rect.right = w;
+    IDirect3DTexture9 * texture = NULL;
+    err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
 
-	if (inputFormat == nvtt::InputFormat_BGRA_8UB)
-	{
-		err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
-	}
-	else
-	{
-		err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
-	}
+    IDirect3DSurface9 * surface = NULL;
+    err = texture->GetSurfaceLevel(0, &surface);
 
-	if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
-	{
-		D3DLOCKED_RECT rect;
-		ZeroMemory(&rect, sizeof(rect));
+    RECT rect;
+    rect.left = 0;
+    rect.top = 0;
+    rect.bottom = h;
+    rect.right = w;
 
-		err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
+    if (inputFormat == nvtt::InputFormat_BGRA_8UB)
+    {
+        err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
+    }
+    else
+    {
+        err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
+    }
 
-		if (outputOptions.outputHandler != NULL) {
-			int size = rect.Pitch * ((h + 3) / 4);
-			outputOptions.outputHandler->writeData(rect.pBits, size);
-		}
+    if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
+    {
+        D3DLOCKED_RECT rect;
+        ZeroMemory(&rect, sizeof(rect));
+
+        err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
 
-		err = surface->UnlockRect();
+	if (outputOptions.outputHandler != NULL) {
+	    int size = rect.Pitch * ((h + 3) / 4);
+	    outputOptions.outputHandler->writeData(rect.pBits, size);
 	}
 
-	surface->Release();
-	device->Release();
-	d3d->Release();
+        err = surface->UnlockRect();
+    }
+
+    surface->Release();
+    device->Release();
+    d3d->Release();
 }
 
 #endif // defined(HAVE_D3DX)
@@ -581,8 +501,8 @@ void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode
 
 void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
 {
-	rgba.swizzle(2, 1, 0, 3); // Swap R and B
-	stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
+    rgba.swizzle(2, 1, 0, 3); // Swap R and B
+    stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
 }
 
 
diff --git a/src/nvtt/CompressorDX9.h b/src/nvtt/CompressorDX9.h
index ab33c7f..9f81e14 100644
--- a/src/nvtt/CompressorDX9.h
+++ b/src/nvtt/CompressorDX9.h
@@ -107,26 +107,26 @@ namespace nv
 #if defined(HAVE_ATITC)
     struct AtiCompressorDXT1 : public CompressorInterface
     {
-        virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+        virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
     };
 
     struct AtiCompressorDXT5 : public CompressorInterface
     {
-        virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+        virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
     };
 #endif
 
 #if defined(HAVE_SQUISH)
     struct SquishCompressorDXT1 : public CompressorInterface
     {
-        virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+        virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
     };
 #endif
 
 #if defined(HAVE_D3DX)
     struct D3DXCompressorDXT1 : public CompressorInterface
     {
-        virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+        virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
     };
 #endif
 
diff --git a/src/nvtt/CompressorDXT.cpp b/src/nvtt/CompressorDXT.cpp
index 3697464..10c9100 100644
--- a/src/nvtt/CompressorDXT.cpp
+++ b/src/nvtt/CompressorDXT.cpp
@@ -143,8 +143,10 @@ void CompressorTask(void * data, int i)
     }
 }
 
-void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
+    nvDebugCheck(d == 1);
+
     CompressorContext context;
     context.alphaMode = alphaMode;
     context.w = w;
@@ -177,8 +179,10 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
 
 
 
-void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
+void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
 {
+    nvDebugCheck(d == 1);
+
     const uint bs = blockSize();
     const uint bw = (w + 3) / 4;
     const uint bh = (h + 3) / 4;
diff --git a/src/nvtt/CompressorDXT.h b/src/nvtt/CompressorDXT.h
index b3b5b55..871eb94 100644
--- a/src/nvtt/CompressorDXT.h
+++ b/src/nvtt/CompressorDXT.h
@@ -35,7 +35,7 @@ namespace nv
 
     struct FixedBlockCompressor : public CompressorInterface
     {
-        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
 
         virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
         virtual uint blockSize() const = 0;
@@ -43,7 +43,7 @@ namespace nv
 
     struct ColorSetCompressor : public CompressorInterface
     {
-        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
 
         virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
         virtual uint blockSize() const = 0;
diff --git a/src/nvtt/CompressorRGB.cpp b/src/nvtt/CompressorRGB.cpp
index 4bc59ba..612c955 100644
--- a/src/nvtt/CompressorRGB.cpp
+++ b/src/nvtt/CompressorRGB.cpp
@@ -26,14 +26,14 @@
 #include "CompressionOptions.h"
 #include "OutputOptions.h"
 
-#include <nvimage/Image.h>
-#include <nvimage/FloatImage.h>
-#include <nvimage/PixelFormat.h>
+#include "nvimage/Image.h"
+#include "nvimage/FloatImage.h"
+#include "nvimage/PixelFormat.h"
 
-#include <nvmath/Color.h>
-#include <nvmath/Half.h>
+#include "nvmath/Color.h"
+#include "nvmath/Half.h"
 
-#include <nvcore/Debug.h>
+#include "nvcore/Debug.h"
 
 using namespace nv;
 using namespace nvtt;
@@ -125,7 +125,7 @@ namespace
 
 
 
-void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
     nvDebugCheck (compressionOptions.format == nvtt::Format_RGBA);
 
@@ -189,77 +189,68 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
     }
 
     const uint pitch = computeBytePitch(w, bitCount, compressionOptions.pitchAlignment);
-    const uint wh = w * h;
+    const uint whd = w * h * d;
 
     // Allocate output scanline.
     uint8 * const dst = malloc<uint8>(pitch);
 
-    for (uint y = 0; y < h; y++)
+    for (uint z = 0; z < d; z++)
     {
-        const uint * src = (const uint *)data + y * w;
-        const float * fsrc = (const float *)data + y * w;
-
-        BitStream stream(dst);
-
-        for (uint x = 0; x < w; x++)
+        for (uint y = 0; y < h; y++)
         {
-            float r = fsrc[x + 0 * wh];
-            float g = fsrc[x + 1 * wh];
-            float b = fsrc[x + 2 * wh];
-            float a = fsrc[x + 3 * wh];
-
-            if (compressionOptions.pixelType == nvtt::PixelType_Float)
-            {
-                if (rsize == 32) stream.putFloat(r);
-                else if (rsize == 16) stream.putHalf(r);
+            const float * src = (const float *)data + y * w;
 
-                if (gsize == 32) stream.putFloat(g);
-                else if (gsize == 16) stream.putHalf(g);
+            BitStream stream(dst);
 
-                if (bsize == 32) stream.putFloat(b);
-                else if (bsize == 16) stream.putHalf(b);
-
-                if (asize == 32) stream.putFloat(a);
-                else if (asize == 16) stream.putHalf(a);
-            }
-            else
+            for (uint x = 0; x < w; x++)
             {
-                Color32 c;
-                if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) {
-                    c.r = uint8(clamp(r * 255, 0.0f, 255.0f));
-                    c.g = uint8(clamp(g * 255, 0.0f, 255.0f));
-                    c.b = uint8(clamp(b * 255, 0.0f, 255.0f));
-                    c.a = uint8(clamp(a * 255, 0.0f, 255.0f));
-                }
-                // @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt
+                float r = src[x + 0 * whd];
+                float g = src[x + 1 * whd];
+                float b = src[x + 2 * whd];
+                float a = src[x + 3 * whd];
 
-                uint p = 0;
-                p |= PixelFormat::convert(c.r, 8, rsize) << rshift;
-                p |= PixelFormat::convert(c.g, 8, gsize) << gshift;
-                p |= PixelFormat::convert(c.b, 8, bsize) << bshift;
-                p |= PixelFormat::convert(c.a, 8, asize) << ashift;
+                if (compressionOptions.pixelType == nvtt::PixelType_Float)
+                {
+                    if (rsize == 32) stream.putFloat(r);
+                    else if (rsize == 16) stream.putHalf(r);
 
-                stream.putBits(p, bitCount);
+                    if (gsize == 32) stream.putFloat(g);
+                    else if (gsize == 16) stream.putHalf(g);
 
-                // Output one byte at a time.
-                /*for (uint i = 0; i < byteCount; i++)
+                    if (bsize == 32) stream.putFloat(b);
+                    else if (bsize == 16) stream.putHalf(b);
+
+                    if (asize == 32) stream.putFloat(a);
+                    else if (asize == 16) stream.putHalf(a);
+                }
+                else
                 {
-                        *(dst + x * byteCount + i) = (p >> (i * 8)) & 0xFF;
-                }*/
+                    Color32 c;
+                    if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) {
+                        c.r = uint8(clamp(r * 255, 0.0f, 255.0f));
+                        c.g = uint8(clamp(g * 255, 0.0f, 255.0f));
+                        c.b = uint8(clamp(b * 255, 0.0f, 255.0f));
+                        c.a = uint8(clamp(a * 255, 0.0f, 255.0f));
+                    }
+                    // @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt
+
+                    uint p = 0;
+                    p |= PixelFormat::convert(c.r, 8, rsize) << rshift;
+                    p |= PixelFormat::convert(c.g, 8, gsize) << gshift;
+                    p |= PixelFormat::convert(c.b, 8, bsize) << bshift;
+                    p |= PixelFormat::convert(c.a, 8, asize) << ashift;
+
+                    stream.putBits(p, bitCount);
+                }
             }
-        }
 
-        // Zero padding.
-        stream.align(compressionOptions.pitchAlignment);
-        nvDebugCheck(stream.ptr == dst + pitch);
+            // Zero padding.
+            stream.align(compressionOptions.pitchAlignment);
+            nvDebugCheck(stream.ptr == dst + pitch);
 
-        /*for (uint x = w * byteCount; x < pitch; x++)
-        {
-                *(dst + x) = 0;
-        }*/
-
-        // @@ This code does not truly support less than byte-aligned textures.
-        outputOptions.writeData(dst, pitch);
+            // Scanlines are always byte-aligned.
+            outputOptions.writeData(dst, pitch);
+        }
     }
 
     free(dst);
diff --git a/src/nvtt/CompressorRGB.h b/src/nvtt/CompressorRGB.h
index bdfea0e..90cac48 100644
--- a/src/nvtt/CompressorRGB.h
+++ b/src/nvtt/CompressorRGB.h
@@ -31,7 +31,7 @@ namespace nv
 {
     struct PixelFormatConverter : public CompressorInterface
     {
-        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
     };
 
 } // nv namespace
diff --git a/src/nvtt/CompressorRGBE.cpp b/src/nvtt/CompressorRGBE.cpp
index 232a939..3dccbac 100644
--- a/src/nvtt/CompressorRGBE.cpp
+++ b/src/nvtt/CompressorRGBE.cpp
@@ -56,17 +56,17 @@ static Color32 toRgbe8(float r, float g, float b)
 }
 
 
-void CompressorRGBE::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void CompressorRGBE::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
     nvDebugCheck (compressionOptions.format == nvtt::Format_RGBE);
 
     uint srcPitch = w;
-    uint srcPlane = w * h;
+    uint srcPlane = w * h * d;
 
     // Allocate output scanline.
     Color32 * dst = new Color32[w];
 
-    for (uint y = 0; y < h; y++)
+    for (uint y = 0; y < h*d; y++)
     {
         const float * src = (const float *)data + y * srcPitch;
 
@@ -75,14 +75,14 @@ void CompressorRGBE::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, con
             float r = src[x + 0 * srcPlane];
             float g = src[x + 1 * srcPlane];
             float b = src[x + 2 * srcPlane];
-            
+
             dst[x] = toRgbe8(r, g, b);
         }
 
-	if (outputOptions.outputHandler != NULL)
-	{
-	    outputOptions.outputHandler->writeData(dst, w * 4);
-	}
+        if (outputOptions.outputHandler != NULL)
+        {
+            outputOptions.outputHandler->writeData(dst, w * 4);
+        }
     }
 
     delete [] dst;
diff --git a/src/nvtt/CompressorRGBE.h b/src/nvtt/CompressorRGBE.h
index bbb625a..f2905bc 100644
--- a/src/nvtt/CompressorRGBE.h
+++ b/src/nvtt/CompressorRGBE.h
@@ -29,9 +29,9 @@
 namespace nv
 {
     struct CompressorRGBE : public CompressorInterface
-	{
-		virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-	};
+    {
+        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+    };
 
 } // nv namespace
 
diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp
index 2c991d7..4c6801f 100644
--- a/src/nvtt/Context.cpp
+++ b/src/nvtt/Context.cpp
@@ -122,7 +122,12 @@ int Compressor::estimateSize(const InputOptions & inputOptions, const Compressio
     int d = inputOptions.m.depth;
     getTargetExtent(w, h, d, inputOptions.m.maxExtent, inputOptions.m.roundMode, inputOptions.m.textureType);
 
-    int mipmapCount = countMipmaps(w, h, d);
+    int mipmapCount = 1;
+    if (inputOptions.m.generateMipmaps) {
+        mipmapCount = countMipmaps(w, h, d);
+        if (inputOptions.m.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.m.maxLevel);
+    }
+
     return inputOptions.m.faceCount * estimateSize(w, h, d, mipmapCount, compressionOptions);
 }
 
@@ -130,7 +135,7 @@ int Compressor::estimateSize(const InputOptions & inputOptions, const Compressio
 // TexImage API.
 bool Compressor::outputHeader(const TexImage & tex, int mipmapCount, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
 {
-    return m.outputHeader(TextureType_2D, tex.width(), tex.height(), tex.depth(), mipmapCount, tex.isNormalMap(), compressionOptions.m, outputOptions.m);
+    return m.outputHeader(tex.type(), tex.width(), tex.height(), tex.depth(), mipmapCount, tex.isNormalMap(), compressionOptions.m, outputOptions.m);
 }
 
 bool Compressor::compress(const TexImage & tex, int face, int mipmap, const CompressionOptions & compressionOptions, const OutputOptions & outputOptions) const
@@ -228,7 +233,7 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
     // Output images.
     for (int f = 0; f < faceCount; f++)
     {
-        img.setImage2D(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.images[f]);
+        img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
 
         // To normal map.
         if (inputOptions.convertToNormalMap) {
@@ -242,7 +247,7 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
         }
 
         // Resize input.
-        img.resize(w, h, ResizeFilter_Box);
+        img.resize(w, h, d, ResizeFilter_Box);
 
         nvtt::TexImage tmp = img;
         if (!img.isNormalMap()) {
@@ -269,7 +274,12 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
             }
 
             if (useSourceImages) {
-                img.setImage2D(inputOptions.inputFormat, w, h, inputOptions.images[idx]);
+                img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
+
+                // For already generated mipmaps, we need to convert to linear.
+                if (!img.isNormalMap()) {
+                    img.toLinear(inputOptions.inputGamma);
+                }
             }
             else {
                 if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
@@ -282,6 +292,7 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
             }
             nvDebugCheck(img.width() == w);
             nvDebugCheck(img.height() == h);
+            nvDebugCheck(img.depth() == d);
 
             if (img.isNormalMap()) {
                 if (inputOptions.normalizeMipmaps) {
@@ -313,7 +324,7 @@ bool Compressor::Private::compress(const TexImage & tex, int face, int mipmap, c
 
 bool Compressor::Private::compress(AlphaMode alphaMode, int w, int h, int d, int face, int mipmap, const float * rgba, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
 {
-    int size = computeImageSize(w, h, d, compressionOptions.bitcount, compressionOptions.pitchAlignment, compressionOptions.format);
+    int size = computeImageSize(w, h, d, compressionOptions.getBitCount(), compressionOptions.pitchAlignment, compressionOptions.format);
     outputOptions.beginImage(size, w, h, d, face, mipmap);
 
     // Decide what compressor to use.
@@ -335,7 +346,7 @@ bool Compressor::Private::compress(AlphaMode alphaMode, int w, int h, int d, int
     }
     else
     {
-        compressor->compress(alphaMode, w, h, rgba, dispatcher, compressionOptions, outputOptions);
+        compressor->compress(alphaMode, w, h, d, rgba, dispatcher, compressionOptions, outputOptions);
     }
 
     return true;
@@ -393,10 +404,10 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
         else if (textureType == TextureType_Cube) {
             header.setTextureCube();
         }
-        /*else if (textureType == TextureType_3D) {
+        else if (textureType == TextureType_3D) {
             header.setTexture3D();
             header.setDepth(d);
-        }*/
+        }
 
         header.setWidth(w);
         header.setHeight(h);
@@ -408,10 +419,7 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
         {
             if (compressionOptions.format == Format_RGBA)
             {
-                int bitcount = compressionOptions.bitcount;
-                if (bitcount == 0) {
-                    bitcount = compressionOptions.rsize + compressionOptions.gsize + compressionOptions.bsize + compressionOptions.asize;
-                }
+                const uint bitcount = compressionOptions.getBitCount();
 
                 if (bitcount == 16)
                 {
diff --git a/src/nvtt/InputOptions.cpp b/src/nvtt/InputOptions.cpp
index a0c2159..0ff1ad6 100644
--- a/src/nvtt/InputOptions.cpp
+++ b/src/nvtt/InputOptions.cpp
@@ -176,9 +176,6 @@ void InputOptions::resetTextureLayout()
 // Copies the data to our internal structures.
 bool InputOptions::setMipmapData(const void * data, int width, int height, int depth /*= 1*/, int face /*= 0*/, int mipLevel /*= 0*/)
 {
-    if (depth != 1) {
-        return false;
-    }
     if (uint(face) >= m.faceCount) {
         return false;
     }
diff --git a/src/nvtt/TexImage.cpp b/src/nvtt/TexImage.cpp
index fd06564..7a08593 100644
--- a/src/nvtt/TexImage.cpp
+++ b/src/nvtt/TexImage.cpp
@@ -115,6 +115,7 @@ uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlign
         return d * h * computeBytePitch(w, bitCount, pitchAlignmentInBytes);
     }
     else {
+        nvDebugCheck(d == 1);
         // @@ Handle 3D textures. DXT and VTC have different behaviors.
         return ((w + 3) / 4) * ((h + 3) / 4) * blockSize(format);
     }
@@ -253,7 +254,7 @@ int TexImage::height() const
 
 int TexImage::depth() const
 {
-    if (m->image != NULL) return 1;
+    if (m->image != NULL) return m->image->depth();
     return 0;
 }
 
@@ -272,6 +273,11 @@ bool TexImage::isNormalMap() const
     return m->isNormalMap;
 }
 
+TextureType TexImage::type() const
+{
+    return m->type;
+}
+
 int TexImage::countMipmaps() const
 {
     if (m->image == NULL) return 0;
@@ -338,7 +344,7 @@ void TexImage::histogram(int channel, float rangeMin, float rangeMax, int binCou
     float scale = float(binCount) / rangeMax;
     float bias = - scale * rangeMin;
 
-    const uint count = m->image->width() * m->image->height();
+    const uint count = m->image->pixelCount();
     for (uint i = 0; i < count; i++) {
         float f = c[i] * scale + bias;
         int idx = ifloor(f);
@@ -355,7 +361,7 @@ void TexImage::range(int channel, float * rangeMin, float * rangeMax)
     FloatImage * img = m->image;
     float * c = img->channel(channel);
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint p = 0; p < count; p++) {
         float f = c[p];
         if (f < range.x) range.x = f;
@@ -378,7 +384,7 @@ bool TexImage::load(const char * fileName, bool * hasAlpha/*= NULL*/)
     detach();
 
     if (hasAlpha != NULL) {
-        *hasAlpha = (img->componentNum() == 4);
+        *hasAlpha = (img->componentCount() == 4);
     }
 
     // @@ Have loadFloat allocate the image with the desired number of channels.
@@ -400,16 +406,17 @@ bool TexImage::save(const char * fileName) const
     return false;
 }
 
-bool TexImage::setImage2D(nvtt::InputFormat format, int w, int h, const void * data)
+bool TexImage::setImage(nvtt::InputFormat format, int w, int h, int d, const void * data)
 {
     detach();
 
     if (m->image == NULL) {
         m->image = new FloatImage();
     }
-    m->image->allocate(4, w, h);
+    m->image->allocate(4, w, h, d);
+    m->type = (d == 1) ? TextureType_2D : TextureType_3D;
 
-    const int count = w * h;
+    const int count = m->image->pixelCount();
 
     float * rdst = m->image->channel(0);
     float * gdst = m->image->channel(1);
@@ -471,16 +478,17 @@ bool TexImage::setImage2D(nvtt::InputFormat format, int w, int h, const void * d
     return true;
 }
 
-bool TexImage::setImage2D(InputFormat format, int w, int h, const void * r, const void * g, const void * b, const void * a)
+bool TexImage::setImage(InputFormat format, int w, int h, int d, const void * r, const void * g, const void * b, const void * a)
 {
     detach();
 
     if (m->image == NULL) {
         m->image = new FloatImage();
     }
-    m->image->allocate(4, w, h);
+    m->image->allocate(4, w, h, d);
+    m->type = (d == 1) ? TextureType_2D : TextureType_3D;
 
-    const int count = w * h;
+    const int count = m->image->pixelCount();
 
     float * rdst = m->image->channel(0);
     float * gdst = m->image->channel(1);
@@ -542,6 +550,7 @@ bool TexImage::setImage2D(InputFormat format, int w, int h, const void * r, cons
     return true;
 }
 
+// @@ Add support for compressed 3D textures.
 bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const void * data)
 {
     if (format != nvtt::Format_BC1 && format != nvtt::Format_BC2 && format != nvtt::Format_BC3 && format != nvtt::Format_BC4 && format != nvtt::Format_BC5)
@@ -554,7 +563,8 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
     if (m->image == NULL) {
         m->image = new FloatImage();
     }
-    m->image->allocate(4, w, h);
+    m->image->allocate(4, w, h, 1);
+    m->type = TextureType_2D;
 
     const int bw = (w + 3) / 4;
     const int bh = (h + 3) / 4;
@@ -570,50 +580,50 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
             {
                 ColorBlock colors;
 
-		        if (format == nvtt::Format_BC1)
-		        {
-		            const BlockDXT1 * block = (const BlockDXT1 *)ptr;
-
-		            if (decoder == Decoder_D3D10) {
-			            block->decodeBlock(&colors, false);
-		            }
-		            else if (decoder == Decoder_D3D9) {
-			            block->decodeBlock(&colors, false);
-		            }
-		            else if (decoder == Decoder_NV5x) {
-			            block->decodeBlockNV5x(&colors);
-		            }
-		        }
-		        else if (format == nvtt::Format_BC2)
-		        {
-		            const BlockDXT3 * block = (const BlockDXT3 *)ptr;
-
-		            if (decoder == Decoder_D3D10) {
-			            block->decodeBlock(&colors, false);
-		            }
-		            else if (decoder == Decoder_D3D9) {
-			            block->decodeBlock(&colors, false);
-		            }
-		            else if (decoder == Decoder_NV5x) {
-		    	        block->decodeBlockNV5x(&colors);
-		            }
-		        }
-		        else if (format == nvtt::Format_BC3)
-		        {
-		            const BlockDXT5 * block = (const BlockDXT5 *)ptr;
-
-		            if (decoder == Decoder_D3D10) {
-			            block->decodeBlock(&colors, false);
-		            }
-		            else if (decoder == Decoder_D3D9) {
-			            block->decodeBlock(&colors, false);
-		            }
-		            else if (decoder == Decoder_NV5x) {
-    			        block->decodeBlockNV5x(&colors);
-		            }
-		        }
-		        else if (format == nvtt::Format_BC4)
-		        {
+		if (format == nvtt::Format_BC1)
+		{
+		    const BlockDXT1 * block = (const BlockDXT1 *)ptr;
+
+		    if (decoder == Decoder_D3D10) {
+			    block->decodeBlock(&colors, false);
+		    }
+		    else if (decoder == Decoder_D3D9) {
+			    block->decodeBlock(&colors, false);
+		    }
+		    else if (decoder == Decoder_NV5x) {
+			    block->decodeBlockNV5x(&colors);
+		    }
+		}
+		else if (format == nvtt::Format_BC2)
+		{
+		    const BlockDXT3 * block = (const BlockDXT3 *)ptr;
+
+		    if (decoder == Decoder_D3D10) {
+			    block->decodeBlock(&colors, false);
+		    }
+		    else if (decoder == Decoder_D3D9) {
+			    block->decodeBlock(&colors, false);
+		    }
+		    else if (decoder == Decoder_NV5x) {
+			block->decodeBlockNV5x(&colors);
+		    }
+		}
+		else if (format == nvtt::Format_BC3)
+		{
+		    const BlockDXT5 * block = (const BlockDXT5 *)ptr;
+
+		    if (decoder == Decoder_D3D10) {
+			    block->decodeBlock(&colors, false);
+		    }
+		    else if (decoder == Decoder_D3D9) {
+			    block->decodeBlock(&colors, false);
+		    }
+		    else if (decoder == Decoder_NV5x) {
+			block->decodeBlockNV5x(&colors);
+		    }
+		}
+		else if (format == nvtt::Format_BC4)
+		{
                     const BlockATI1 * block = (const BlockATI1 *)ptr;
                     block->decodeBlock(&colors, decoder == Decoder_D3D9);
                 }
@@ -623,25 +633,25 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
                     block->decodeBlock(&colors, decoder == Decoder_D3D9);
                 }
 
-		        for (int yy = 0; yy < 4; yy++)
-		        {
-		            for (int xx = 0; xx < 4; xx++)
-		            {
-			            Color32 c = colors.color(xx, yy);
-
-			            if (x * 4 + xx < w && y * 4 + yy < h)
-			            {
-			                m->image->pixel(x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f;
-			                m->image->pixel(x*4 + xx, y*4 + yy, 1) = float(c.g) * 1.0f/255.0f;
-			                m->image->pixel(x*4 + xx, y*4 + yy, 2) = float(c.b) * 1.0f/255.0f;
-			                m->image->pixel(x*4 + xx, y*4 + yy, 3) = float(c.a) * 1.0f/255.0f;
-			            }
-		            }
-		        }
-
-		        ptr += bs;
-	        }
+		for (int yy = 0; yy < 4; yy++)
+		{
+		    for (int xx = 0; xx < 4; xx++)
+		    {
+			Color32 c = colors.color(xx, yy);
+
+			if (x * 4 + xx < w && y * 4 + yy < h)
+			{
+			    m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f;
+			    m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f;
+			    m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f;
+			    m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f;
+			}
+		    }
+		}
+
+		ptr += bs;
 	    }
+	}
     }
     catch(...) {
         return false;
@@ -673,19 +683,19 @@ static void getDefaultFilterWidthAndParams(int filter, float * filterWidth, floa
     }
 }
 
-void TexImage::resize(int w, int h, ResizeFilter filter)
+void TexImage::resize(int w, int h, int d, ResizeFilter filter)
 {
     float filterWidth;
     float params[2];
     getDefaultFilterWidthAndParams(filter, &filterWidth, params);
 
-    resize(w, h, filter, filterWidth, params);
+    resize(w, h, d, filter, filterWidth, params);
 }
 
-void TexImage::resize(int w, int h, ResizeFilter filter, float filterWidth, const float * params)
+void TexImage::resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params)
 {
     FloatImage * img = m->image;
-    if (img == NULL || (w == img->width() && h == img->height())) {
+    if (img == NULL || (w == img->width() && h == img->height() && d == img->depth())) {
         return;
     }
 
@@ -698,25 +708,25 @@ void TexImage::resize(int w, int h, ResizeFilter filter, float filterWidth, cons
         if (filter == ResizeFilter_Box)
         {
             BoxFilter filter(filterWidth);
-            img = img->resize(filter, w, h, wrapMode, 3);
+            img = img->resize(filter, w, h, d, wrapMode, 3);
         }
         else if (filter == ResizeFilter_Triangle)
         {
             TriangleFilter filter(filterWidth);
-            img = img->resize(filter, w, h, wrapMode, 3);
+            img = img->resize(filter, w, h, d, wrapMode, 3);
         }
         else if (filter == ResizeFilter_Kaiser)
         {
             KaiserFilter filter(filterWidth);
             if (params != NULL) filter.setParameters(params[0], params[1]);
-            img = img->resize(filter, w, h, wrapMode, 3);
+            img = img->resize(filter, w, h, d, wrapMode, 3);
         }
         else //if (filter == ResizeFilter_Mitchell)
         {
             nvDebugCheck(filter == ResizeFilter_Mitchell);
             MitchellFilter filter;
             if (params != NULL) filter.setParameters(params[0], params[1]);
-            img = img->resize(filter, w, h, wrapMode, 3);
+            img = img->resize(filter, w, h, d, wrapMode, 3);
         }
     }
     else
@@ -724,25 +734,25 @@ void TexImage::resize(int w, int h, ResizeFilter filter, float filterWidth, cons
         if (filter == ResizeFilter_Box)
         {
             BoxFilter filter(filterWidth);
-            img = img->resize(filter, w, h, wrapMode);
+            img = img->resize(filter, w, h, d, wrapMode);
         }
         else if (filter == ResizeFilter_Triangle)
         {
             TriangleFilter filter(filterWidth);
-            img = img->resize(filter, w, h, wrapMode);
+            img = img->resize(filter, w, h, d, wrapMode);
         }
         else if (filter == ResizeFilter_Kaiser)
         {
             KaiserFilter filter(filterWidth);
             if (params != NULL) filter.setParameters(params[0], params[1]);
-            img = img->resize(filter, w, h, wrapMode);
+            img = img->resize(filter, w, h, d, wrapMode);
         }
         else //if (filter == ResizeFilter_Mitchell)
         {
             nvDebugCheck(filter == ResizeFilter_Mitchell);
             MitchellFilter filter;
             if (params != NULL) filter.setParameters(params[0], params[1]);
-            img = img->resize(filter, w, h, wrapMode);
+            img = img->resize(filter, w, h, d, wrapMode);
         }
     }
 
@@ -765,11 +775,11 @@ void TexImage::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, f
 
     int w = m->image->width();
     int h = m->image->height();
-    int d = 1;
+    int d = m->image->depth();
 
-    getTargetExtent(w, h, d, maxExtent, roundMode, nvtt::TextureType_2D);
+    getTargetExtent(w, h, d, maxExtent, roundMode, m->type);
 
-    resize(w, h, filter, filterWidth, params);
+    resize(w, h, d, filter, filterWidth, params);
 }
 
 bool TexImage::buildNextMipmap(MipmapFilter filter)
@@ -784,7 +794,7 @@ bool TexImage::buildNextMipmap(MipmapFilter filter)
 bool TexImage::buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params)
 {
     FloatImage * img = m->image;
-    if (img == NULL || (img->width() == 1 && img->height() == 1)) {
+    if (img == NULL || (img->width() == 1 && img->height() == 1 && img->depth() == 1)) {
         return false;
     }
 
@@ -816,7 +826,7 @@ bool TexImage::buildNextMipmap(MipmapFilter filter, float filterWidth, const flo
     {
         if (filter == MipmapFilter_Box)
         {
-            if (filterWidth == 0.5f) {
+            if (filterWidth == 0.5f && img->depth() == 1) {
                 img = img->fastDownSample();
             }
             else {
@@ -939,8 +949,8 @@ void TexImage::blend(float red, float green, float blue, float alpha, float t)
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const int count = img->width() * img->height();
-    for (int i = 0; i < count; i++)
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++)
     {
         r[i] = lerp(r[i], red, t);
         g[i] = lerp(g[i], green, t);
@@ -961,8 +971,8 @@ void TexImage::premultiplyAlpha()
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const int count = img->width() * img->height();
-    for (int i = 0; i < count; i++)
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++)
     {
         r[i] *= a[i];
         g[i] *= a[i];
@@ -989,8 +999,8 @@ void TexImage::toGreyScale(float redScale, float greenScale, float blueScale, fl
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const int count = img->width() * img->height();
-    for (int i = 0; i < count; i++)
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++)
     {
         float grey = r[i] * redScale + g[i] * greenScale + b[i] * blueScale + a[i] * alphaScale;
         a[i] = b[i] = g[i] = r[i] = grey;
@@ -1005,33 +1015,37 @@ void TexImage::setBorder(float r, float g, float b, float a)
     detach();
 
     FloatImage * img = m->image;
-    const int w = img->width();
-    const int h = img->height();
-
-    for (int i = 0; i < w; i++)
-    {
-        img->pixel(i, 0, 0) = r;
-        img->pixel(i, 0, 1) = g;
-        img->pixel(i, 0, 2) = b;
-        img->pixel(i, 0, 3) = a;
-
-        img->pixel(i, h-1, 0) = r;
-        img->pixel(i, h-1, 1) = g;
-        img->pixel(i, h-1, 2) = b;
-        img->pixel(i, h-1, 3) = a;
-    }
+    const uint w = img->width();
+    const uint h = img->height();
+    const uint d = img->depth();
 
-    for (int i = 0; i < h; i++)
+    for (int z = 0; z < d; z++)
     {
-        img->pixel(0, i, 0) = r;
-        img->pixel(0, i, 1) = g;
-        img->pixel(0, i, 2) = b;
-        img->pixel(0, i, 3) = a;
+        for (int i = 0; i < w; i++)
+        {
+            img->pixel(0, i, 0, z) = r;
+            img->pixel(1, i, 0, z) = g;
+            img->pixel(2, i, 0, z) = b;
+            img->pixel(3, i, 0, z) = a;
+
+            img->pixel(0, i, h-1, z) = r;
+            img->pixel(1, i, h-1, z) = g;
+            img->pixel(2, i, h-1, z) = b;
+            img->pixel(3, i, h-1, z) = a;
+        }
 
-        img->pixel(w-1, i, 0) = r;
-        img->pixel(w-1, i, 1) = g;
-        img->pixel(w-1, i, 2) = b;
-        img->pixel(w-1, i, 3) = a;
+        for (int i = 0; i < h; i++)
+        {
+            img->pixel(0, 0, i, z) = r;
+            img->pixel(1, 0, i, z) = g;
+            img->pixel(2, 0, i, z) = b;
+            img->pixel(3, 0, i, z) = a;
+
+            img->pixel(0, w-1, i, z) = r;
+            img->pixel(1, w-1, i, z) = g;
+            img->pixel(2, w-1, i, z) = b;
+            img->pixel(3, w-1, i, z) = a;
+        }
     }
 }
 
@@ -1048,8 +1062,8 @@ void TexImage::fill(float red, float green, float blue, float alpha)
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const int count = img->width() * img->height();
-    for (int i = 0; i < count; i++)
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++)
     {
         r[i] = red;
         g[i] = green;
@@ -1112,7 +1126,7 @@ void TexImage::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++) {
         float R = nv::clamp(r[i] * irange, 0.0f, 1.0f);
         float G = nv::clamp(g[i] * irange, 0.0f, 1.0f);
@@ -1139,7 +1153,7 @@ void TexImage::fromRGBM(float range/*= 1*/)
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++) {
         float M = a[i] * range;
 
@@ -1164,7 +1178,7 @@ void TexImage::toYCoCg()
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++) {
         float R = r[i];
         float G = g[i];
@@ -1191,7 +1205,7 @@ void TexImage::toYCoCg()
 // and minimize bilinear interpolation artifacts.
 void TexImage::blockScaleCoCg(int bits/*= 5*/, float threshold/*= 0.0*/)
 {
-    if (m->image == NULL) return;
+    if (m->image == NULL || m->image->depth() != 1) return;
 
     detach();
 
@@ -1214,8 +1228,8 @@ void TexImage::blockScaleCoCg(int bits/*= 5*/, float threshold/*= 0.0*/)
                     const uint x = bi*4 + i;
                     if (x >= w) continue;
 
-                    float Co = img->pixel(x, y, 0);
-                    float Cg = img->pixel(x, y, 1);
+                    float Co = img->pixel(0, x, y, 0);
+                    float Cg = img->pixel(1, x, y, 0);
 
                     m = max(m, fabsf(Co));
                     m = max(m, fabsf(Cg));
@@ -1231,8 +1245,8 @@ void TexImage::blockScaleCoCg(int bits/*= 5*/, float threshold/*= 0.0*/)
                     uint x = min(bi*4 + i, w);
                     uint y = min(bj*4 + j, h);
 
-                    float & Co = img->pixel(x, y, 0);
-                    float & Cg = img->pixel(x, y, 1);
+                    float & Co = img->pixel(0, x, y, 0);
+                    float & Cg = img->pixel(1, x, y, 0);
 
                     Co /= scale;
                     nvDebugCheck(fabsf(Co) <= 1.0f);
@@ -1240,7 +1254,7 @@ void TexImage::blockScaleCoCg(int bits/*= 5*/, float threshold/*= 0.0*/)
                     Cg /= scale;
                     nvDebugCheck(fabsf(Cg) <= 1.0f);
 
-                    img->pixel(x, y, 2) = scale;
+                    img->pixel(2, x, y, 0) = scale;
                 }
             }
         }
@@ -1259,7 +1273,7 @@ void TexImage::fromYCoCg()
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++) {
         float Co = r[i];
         float Cg = g[i];
@@ -1294,7 +1308,7 @@ void TexImage::toLUVW(float range/*= 1.0f*/)
     float * b = img->channel(2);
     float * a = img->channel(3);
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++) {
         float R = nv::clamp(r[i] * irange, 0.0f, 1.0f);
         float G = nv::clamp(g[i] * irange, 0.0f, 1.0f);
@@ -1324,7 +1338,7 @@ void TexImage::abs(int channel)
     FloatImage * img = m->image;
     float * c = img->channel(channel);
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++) {
         c[i] = fabsf(c[i]);
     }
@@ -1454,6 +1468,7 @@ void TexImage::fromJPEGLS()
 */
 
 
+// If dither is true, this uses Floyd-Steinberg dithering method.
 void TexImage::binarize(int channel, float threshold, bool dither)
 {
     if (m->image == NULL) return;
@@ -1461,45 +1476,51 @@ void TexImage::binarize(int channel, float threshold, bool dither)
     detach();
 
     FloatImage * img = m->image;
-    const uint w = img->width();
-    const uint h = img->height();
 
     if (!dither) {
         float * c = img->channel(channel);
-        const uint count = w * h;
+        const uint count = img->pixelCount();
         for (uint i = 0; i < count; i++) {
             c[i] = float(c[i] > threshold);
         }
     }
     else {
+        const uint w = img->width();
+        const uint h = img->height();
+        const uint d = img->depth();
+
         float * row0 = new float[(w+2)];
         float * row1 = new float[(w+2)];
-        memset(row0, 0, sizeof(float)*(w+2));
-        memset(row1, 0, sizeof(float)*(w+2));
 
-        for (uint y = 0; y < h; y++) {
-            for (uint x = 0; x < w; x++) {
+        // @@ Extend Floyd-Steinberg dithering to 3D properly.
+        for (uint z = 0; z < d; z++) {
+            memset(row0, 0, sizeof(float)*(w+2));
+            memset(row1, 0, sizeof(float)*(w+2));
+
+            for (uint y = 0; y < h; y++) {
+                for (uint x = 0; x < w; x++) {
 
-                float & f = img->pixel(x, y, channel);
+                    float & f = img->pixel(channel, x, y, 0);
 
-                // Add error and quantize.
-                float qf = float(f + row0[1+x] > threshold);
+                    // Add error and quantize.
+                    float qf = float(f + row0[1+x] > threshold);
 
-                // Compute new error:
-                float diff = f - qf;
+                    // Compute new error:
+                    float diff = f - qf;
 
-                // Store color.
-                f = qf;
+                    // Store color.
+                    f = qf;
 
-                // Propagate new error.
-                row0[1+x+1] += (7.0f / 16.0f) * diff;
-                row1[1+x-1] += (3.0f / 16.0f) * diff;
-                row1[1+x+0] += (5.0f / 16.0f) * diff;
-                row1[1+x+1] += (1.0f / 16.0f) * diff;
-            }
+                    // Propagate new error.
+                    row0[1+x+1] += (7.0f / 16.0f) * diff;
+                    row1[1+x-1] += (3.0f / 16.0f) * diff;
+                    row1[1+x+0] += (5.0f / 16.0f) * diff;
+                    row1[1+x+1] += (1.0f / 16.0f) * diff;
+                }
 
-            swap(row0, row1);
-            memset(row1, 0, sizeof(float)*(w+2));
+                swap(row0, row1);
+                memset(row1, 0, sizeof(float)*(w+2));
+            }
         }
 
         delete [] row0;
@@ -1510,6 +1531,7 @@ void TexImage::binarize(int channel, float threshold, bool dither)
 // Uniform quantizer.
 // Assumes input is in [0, 1] range. Output is in the [0, 1] range, but rounded to the middle of each bin.
 // If exactEndPoints is true, [0, 1] are represented exactly, and the correponding bins are half the size, so quantization is not truly uniform.
+// When dither is true, this uses Floyd-Steinberg dithering.
 void TexImage::quantize(int channel, int bits, bool exactEndPoints, bool dither)
 {
     if (m->image == NULL) return;
@@ -1517,11 +1539,8 @@ void TexImage::quantize(int channel, int bits, bool exactEndPoints, bool dither)
     detach();
 
     FloatImage * img = m->image;
-    const uint w = img->width();
-    const uint h = img->height();
 
     float scale, offset;
-
     if (exactEndPoints) {
         scale = float((1 << bits) - 1);
         offset = 0.0f;
@@ -1533,40 +1552,47 @@ void TexImage::quantize(int channel, int bits, bool exactEndPoints, bool dither)
 
     if (!dither) {
         float * c = img->channel(channel);
-        const uint count = w * h;
+        const uint count = img->pixelCount();
         for (uint i = 0; i < count; i++) {
             c[i] = floorf(c[i] * scale + offset) / scale;
         }
     }
     else {
+        const uint w = img->width();
+        const uint h = img->height();
+        const uint d = img->depth();
+
         float * row0 = new float[(w+2)];
         float * row1 = new float[(w+2)];
-        memset(row0, 0, sizeof(float)*(w+2));
-        memset(row1, 0, sizeof(float)*(w+2));
 
-        for (uint y = 0; y < h; y++) {
-            for (uint x = 0; x < w; x++) {
+        for (uint z = 0; z < d; z++) {
+            memset(row0, 0, sizeof(float)*(w+2));
+            memset(row1, 0, sizeof(float)*(w+2));
 
-                float & f = img->pixel(x, y, channel);
+            for (uint y = 0; y < h; y++) {
+                for (uint x = 0; x < w; x++) {
 
-                // Add error and quantize.
-                float qf = floorf((f + row0[1+x]) * scale + offset) / scale;
+                    float & f = img->pixel(channel, x, y, 0);
 
-                // Compute new error:
-                float diff = f - qf;
+                    // Add error and quantize.
+                    float qf = floorf((f + row0[1+x]) * scale + offset) / scale;
 
-                // Store color.
-                f = qf;
+                    // Compute new error:
+                    float diff = f - qf;
 
-                // Propagate new error.
-                row0[1+x+1] += (7.0f / 16.0f) * diff;
-                row1[1+x-1] += (3.0f / 16.0f) * diff;
-                row1[1+x+0] += (5.0f / 16.0f) * diff;
-                row1[1+x+1] += (1.0f / 16.0f) * diff;
-            }
+                    // Store color.
+                    f = qf;
 
-            swap(row0, row1);
-            memset(row1, 0, sizeof(float)*(w+2));
+                    // Propagate new error.
+                    row0[1+x+1] += (7.0f / 16.0f) * diff;
+                    row1[1+x-1] += (3.0f / 16.0f) * diff;
+                    row1[1+x+0] += (5.0f / 16.0f) * diff;
+                    row1[1+x+1] += (1.0f / 16.0f) * diff;
+                }
+
+                swap(row0, row1);
+                memset(row1, 0, sizeof(float)*(w+2));
+            }
         }
 
         delete [] row0;
@@ -1612,13 +1638,14 @@ void TexImage::transformNormals(NormalTransform xform)
 
     detach();
 
-    m->image->expandNormals(0);
+    FloatImage * img = m->image;
+    img->expandNormals(0);
 
-    const uint count = m->image->width() * m->image->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++) {
-        float & x = m->image->pixel(i, 0);
-        float & y = m->image->pixel(i, 1);
-        float & z = m->image->pixel(i, 2);
+        float & x = img->pixel(0, i);
+        float & y = img->pixel(1, i);
+        float & z = img->pixel(2, i);
         Vector3 n(x, y, z);
 
         n = normalizeSafe(n, Vector3(0.0f), 0.0f);
@@ -1675,7 +1702,7 @@ void TexImage::transformNormals(NormalTransform xform)
         z = n.z;
     }
 
-    m->image->packNormals(0);
+    img->packNormals(0);
 }
 
 void TexImage::reconstructNormals(NormalTransform xform)
@@ -1684,13 +1711,14 @@ void TexImage::reconstructNormals(NormalTransform xform)
 
     detach();
 
-    m->image->expandNormals(0);
+    FloatImage * img = m->image;
+    img->expandNormals(0);
 
-    const uint count = m->image->width() * m->image->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++) {
-        float & x = m->image->pixel(i, 0);
-        float & y = m->image->pixel(i, 1);
-        float & z = m->image->pixel(i, 2);
+        float & x = img->pixel(0, i);
+        float & y = img->pixel(1, i);
+        float & z = img->pixel(2, i);
         Vector3 n(x, y, z);
 
         if (xform == NormalTransform_Orthographic) {
@@ -1723,16 +1751,68 @@ void TexImage::reconstructNormals(NormalTransform xform)
         z = n.z;
     }
 
+    img->packNormals(0);
+}
+
+void TexImage::toCleanNormalMap()
+{
+    if (m->image == NULL) return;
+
+    detach();
+
+    m->image->expandNormals(0);
+
+    const uint count = m->image->pixelCount();
+    for (uint i = 0; i < count; i++) {
+        float x = m->image->pixel(0, i);
+        float y = m->image->pixel(1, i);
+
+        m->image->pixel(2, i) = x*x + y*y;
+    }
+
+    m->image->packNormals(0);
+}
+
+// [-1,1] -> [ 0,1]
+void TexImage::packNormals() {
+    if (m->image == NULL) return;
+    detach();
     m->image->packNormals(0);
 }
 
-void TexImage::flipVertically()
+// [ 0,1] -> [-1,1]
+void TexImage::expandNormals() {
+    if (m->image == NULL) return;
+    detach();
+    m->image->expandNormals(0);
+}
+
+
+void TexImage::flipX()
+{
+    if (m->image == NULL) return;
+
+    detach();
+
+    m->image->flipX();
+}
+
+void TexImage::flipY()
+{
+    if (m->image == NULL) return;
+
+    detach();
+
+    m->image->flipY();
+}
+
+void TexImage::flipZ()
 {
     if (m->image == NULL) return;
 
     detach();
 
-    m->image->flip();
+    m->image->flipZ();
 }
 
 bool TexImage::copyChannel(const TexImage & srcImage, int srcChannel)
@@ -1747,17 +1827,14 @@ bool TexImage::copyChannel(const TexImage & srcImage, int srcChannel, int dstCha
     FloatImage * dst = m->image;
     const FloatImage * src = srcImage.m->image;
 
-    if (dst == NULL || src == NULL || dst->width() != src->width() || dst->height() != src->height()) {
+    if (!sameLayout(dst, src)) {
         return false;
     }
-    nvDebugCheck(dst->componentNum() == 4 && src->componentNum() == 4);
+    nvDebugCheck(dst->componentCount() == 4 && src->componentCount() == 4);
 
     detach();
 
-    const uint w = src->width();
-    const uint h = src->height();
-
-    memcpy(dst->channel(dstChannel), src->channel(srcChannel), w*h*sizeof(float));
+    memcpy(dst->channel(dstChannel), src->channel(srcChannel), dst->pixelCount()*sizeof(float));
 
     return true;
 }
@@ -1769,10 +1846,10 @@ bool TexImage::addChannel(const TexImage & srcImage, int srcChannel, int dstChan
     FloatImage * dst = m->image;
     const FloatImage * src = srcImage.m->image;
 
-    if (dst == NULL || src == NULL || dst->width() != src->width() || dst->height() != src->height()) {
+    if (!sameLayout(dst, src)) {
         return false;
     }
-    nvDebugCheck(dst->componentNum() == 4 && src->componentNum() == 4);
+    nvDebugCheck(dst->componentCount() == 4 && src->componentCount() == 4);
 
     detach();
 
@@ -1781,7 +1858,9 @@ bool TexImage::addChannel(const TexImage & srcImage, int srcChannel, int dstChan
 
     float * d = dst->channel(dstChannel);
     const float * s = src->channel(srcChannel);
-    for (uint i = 0; i < w*h; i++) {
+
+    const uint count = src->pixelCount();
+    for (uint i = 0; i < count; i++) {
         d[i] += s[i] * scale;
     }
 
@@ -1819,27 +1898,28 @@ TexImage nvtt::diff(const TexImage & reference, const TexImage & image, float sc
     const FloatImage * ref = reference.m->image;
     const FloatImage * img = image.m->image;
 
-    if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
+    if (!sameLayout(img, ref)) {
         return TexImage();
     }
-    nvDebugCheck(img->componentNum() == 4);
-    nvDebugCheck(ref->componentNum() == 4);
+
+    nvDebugCheck(img->componentCount() == 4);
+    nvDebugCheck(ref->componentCount() == 4);
 
     nvtt::TexImage diffImage;
     FloatImage * diff = diffImage.m->image = new FloatImage;
-    diff->allocate(4, img->width(), img->height());
+    diff->allocate(4, img->width(), img->height(), img->depth());
 
-    const uint count = img->width() * img->height();
+    const uint count = img->pixelCount();
     for (uint i = 0; i < count; i++)
     {
-        float r0 = img->pixel(i, 0);
-        float g0 = img->pixel(i, 1);
-        float b0 = img->pixel(i, 2);
-        //float a0 = img->pixel(i, 3);
-        float r1 = ref->pixel(i, 0);
-        float g1 = ref->pixel(i, 1);
-        float b1 = ref->pixel(i, 2);
-        float a1 = ref->pixel(i, 3);
+        float r0 = img->pixel(0, i);
+        float g0 = img->pixel(1, i);
+        float b0 = img->pixel(2, i);
+        //float a0 = img->pixel(3, i);
+        float r1 = ref->pixel(0, i);
+        float g1 = ref->pixel(1, i);
+        float b1 = ref->pixel(2, i);
+        float a1 = ref->pixel(3, i);
 
         float dr = r0 - r1;
         float dg = g0 - g1;
@@ -1853,10 +1933,10 @@ TexImage nvtt::diff(const TexImage & reference, const TexImage & image, float sc
             db *= a1;
         }
 
-        diff->pixel(i, 0) = dr * scale;
-        diff->pixel(i, 1) = dg * scale;
-        diff->pixel(i, 2) = db * scale;
-        diff->pixel(i, 3) = a1;
+        diff->pixel(0, i) = dr * scale;
+        diff->pixel(1, i) = dg * scale;
+        diff->pixel(2, i) = db * scale;
+        diff->pixel(3, i) = a1;
     }
 
     return diffImage;
diff --git a/src/nvtt/cuda/CudaCompressorDXT.cpp b/src/nvtt/cuda/CudaCompressorDXT.cpp
index 50c5143..51a44f2 100644
--- a/src/nvtt/cuda/CudaCompressorDXT.cpp
+++ b/src/nvtt/cuda/CudaCompressorDXT.cpp
@@ -125,8 +125,9 @@ CudaCompressor::CudaCompressor(CudaContext & ctx) : m_ctx(ctx)
 
 }
 
-void CudaCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
+void CudaCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
 {
+    nvDebugCheck(d == 1);
     nvDebugCheck(cuda::isHardwarePresent());
 
 #if defined HAVE_CUDA
diff --git a/src/nvtt/cuda/CudaCompressorDXT.h b/src/nvtt/cuda/CudaCompressorDXT.h
index cc08759..e40fa9b 100644
--- a/src/nvtt/cuda/CudaCompressorDXT.h
+++ b/src/nvtt/cuda/CudaCompressorDXT.h
@@ -54,7 +54,7 @@ namespace nv
     {
         CudaCompressor(CudaContext & ctx);
 
-        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
+        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
 
         virtual void setup(cudaArray * image, const nvtt::CompressionOptions::Private & compressionOptions) = 0;
         virtual void compressBlocks(uint first, uint count, uint w, uint h, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
diff --git a/src/nvtt/cuda/CudaUtils.cpp b/src/nvtt/cuda/CudaUtils.cpp
index 2ef9b66..e880f8b 100644
--- a/src/nvtt/cuda/CudaUtils.cpp
+++ b/src/nvtt/cuda/CudaUtils.cpp
@@ -23,7 +23,7 @@
 // OTHER DEALINGS IN THE SOFTWARE.
 
 #include "nvcore/Debug.h"
-//#include "nvcore/Library.h"
+#include "nvcore/Library.h"
 #include "CudaUtils.h"
 
 #if defined HAVE_CUDA
diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h
index 0ca09d3..a8c6942 100644
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@@ -190,7 +190,7 @@ namespace nvtt
     {
         TextureType_2D,
         TextureType_Cube,
-    //  TextureType_3D,
+        TextureType_3D,
     };
 
     /// Input formats.
@@ -415,6 +415,7 @@ namespace nvtt
         NVTT_API int width() const;
         NVTT_API int height() const;
         NVTT_API int depth() const;
+        NVTT_API TextureType type() const;
         NVTT_API WrapMode wrapMode() const;
         NVTT_API AlphaMode alphaMode() const;
         NVTT_API bool isNormalMap() const;
@@ -428,13 +429,13 @@ namespace nvtt
         // Texture data.
         NVTT_API bool load(const char * fileName, bool * hasAlpha = 0);
         NVTT_API bool save(const char * fileName) const;
-        NVTT_API bool setImage2D(InputFormat format, int w, int h, const void * data);
-        NVTT_API bool setImage2D(InputFormat format, int w, int h, const void * r, const void * g, const void * b, const void * a);
+        NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * data);
+        NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * r, const void * g, const void * b, const void * a);
         NVTT_API bool setImage2D(Format format, Decoder decoder, int w, int h, const void * data);
 
         // Resizing methods.
-        NVTT_API void resize(int w, int h, ResizeFilter filter);
-        NVTT_API void resize(int w, int h, ResizeFilter filter, float filterWidth, const float * params = 0);
+        NVTT_API void resize(int w, int h, int d, ResizeFilter filter);
+        NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0);
         NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
         NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
         NVTT_API bool buildNextMipmap(MipmapFilter filter);
@@ -471,14 +472,19 @@ namespace nvtt
         NVTT_API void binarize(int channel, float threshold, bool dither);
         NVTT_API void quantize(int channel, int bits, bool exactEndPoints, bool dither);
 
-        // Normal map transforms.
+        // Normal map transforms. @@ All these methods assume packed normals.
         NVTT_API void toNormalMap(float sm, float medium, float big, float large);
         NVTT_API void normalizeNormalMap();
         NVTT_API void transformNormals(NormalTransform xform);
         NVTT_API void reconstructNormals(NormalTransform xform);
+        NVTT_API void toCleanNormalMap();
+        NVTT_API void packNormals();   // [-1,1] -> [ 0,1]
+        NVTT_API void expandNormals(); // [ 0,1] -> [-1,1]
 
         // Geometric transforms.
-        NVTT_API void flipVertically();
+        NVTT_API void flipX();
+        NVTT_API void flipY();
+        NVTT_API void flipZ();
 
         // Copy image data.
         NVTT_API bool copyChannel(const TexImage & srcImage, int srcChannel);
@@ -501,6 +507,54 @@ namespace nvtt
     };
 
 
+    /// A texture mipmap.
+    struct CubeImage
+    {
+        NVTT_API CubeImage();
+        NVTT_API CubeImage(const CubeImage & tex);
+        NVTT_API ~CubeImage();
+
+        NVTT_API void operator=(const CubeImage & tex);
+
+        // Queries.
+        NVTT_API bool isNull() const;
+        NVTT_API int size() const;
+        NVTT_API int countMipmaps() const;
+        NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const;
+
+        // Texture data.
+        NVTT_API bool load(const char * fileName);
+        NVTT_API bool save(const char * fileName) const;
+        NVTT_API bool setImage2D(InputFormat format, int face, int w, int h, const void * data);
+        NVTT_API bool setImage2D(InputFormat format, int face, int w, int h, const void * r, const void * g, const void * b, const void * a);
+        NVTT_API bool setImage2D(Format format, Decoder decoder, int face, int w, int h, const void * data);
+
+        TexImage & face(int face);
+
+        //
+
+        // @@ Add resizing methods.
+        /*
+        NVTT_API void resize(int w, int h, ResizeFilter filter);
+        NVTT_API void resize(int w, int h, ResizeFilter filter, float filterWidth, const float * params = 0);
+        NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
+        NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
+        NVTT_API bool buildNextMipmap(MipmapFilter filter);
+        NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0);
+        */
+
+        // Color transforms.
+        NVTT_API void toLinear(float gamma);
+        NVTT_API void toGamma(float gamma);
+
+    private:
+        void detach();
+
+        struct Private;
+        Private * m;
+    };
+
+
     // Return string for the given error code.
     NVTT_API const char * errorString(Error e);
 
diff --git a/src/nvtt/tests/imperativeapi.cpp b/src/nvtt/tests/imperativeapi.cpp
index 98a29bf..bd319aa 100644
--- a/src/nvtt/tests/imperativeapi.cpp
+++ b/src/nvtt/tests/imperativeapi.cpp
@@ -60,7 +60,7 @@ int main(int argc, char *argv[])
     // Output compressed image.
     context.outputHeader(image, image.countMipmaps(), compressionOptions, outputOptions);
 
-    image.flipVertically();
+    image.flipY();
     image.setAlphaMode(nvtt::AlphaMode_Transparency);
 
     // Output first mipmap.
diff --git a/src/nvtt/tests/process_alpha_map.cpp b/src/nvtt/tests/process_alpha_map.cpp
index 58a9557..6452d11 100644
--- a/src/nvtt/tests/process_alpha_map.cpp
+++ b/src/nvtt/tests/process_alpha_map.cpp
@@ -95,7 +95,7 @@ int main(int argc, char *argv[])
     // Output header and first mipmap.
     context.outputHeader(colorMap, colorMap.countMipmaps(), colorCompressionOptions, colorOutputOptions);
 
-    colorMap.flipVertically();
+    colorMap.flipY();
     colorMap.setAlphaMode(nvtt::AlphaMode_Transparency);
 
     context.compress(colorMap, 0, 0, colorCompressionOptions, colorOutputOptions);
@@ -103,7 +103,7 @@ int main(int argc, char *argv[])
     if (inputFileNameNormal != NULL) {
         context.outputHeader(normalMap, normalMap.countMipmaps(), normalCompressionOptions, normalOutputOptions);
 
-        normalMap.flipVertically();
+        normalMap.flipY();
         normalMap.setAlphaMode(nvtt::AlphaMode_Transparency);
         normalMap.normalizeNormalMap();
         normalMap.copyChannel(colorMap, 3); // Copy alpha channel from color to normal map.
@@ -118,7 +118,7 @@ int main(int argc, char *argv[])
     const float coverage = colorMap.alphaTestCoverage(alphaRef);
 
     // Build and output mipmaps.
-	int m = 1;
+    int m = 1;
     while (colorMap.buildNextMipmap(nvtt::MipmapFilter_Kaiser))
     {
         colorMap.scaleAlphaToCoverage(coverage, alphaRef);
@@ -136,7 +136,7 @@ int main(int argc, char *argv[])
             context.compress(normalMap, 0, m, normalCompressionOptions, normalOutputOptions);            
         }
 
-		m++;
+        m++;
     }
 
     return EXIT_SUCCESS;
diff --git a/src/nvtt/tests/testsuite.cpp b/src/nvtt/tests/testsuite.cpp
index 73c272c..9645238 100644
--- a/src/nvtt/tests/testsuite.cpp
+++ b/src/nvtt/tests/testsuite.cpp
@@ -839,7 +839,7 @@ int main(int argc, char *argv[])
                 outputFileName.format("%s/%s", outPath, set.fileNames[i]);
                 outputFileName.stripExtension();
                 if (set.type == ImageType_HDR) outputFileName.append(".dds");
-                else outputFileName.append(".png");
+                else outputFileName.append(".tga");
                 if (!img.save(outputFileName.str()))
                 {
                     printf("Error saving file '%s'.\n", outputFileName.str());
@@ -851,7 +851,7 @@ int main(int argc, char *argv[])
             outputFileName.format("%s/%s", outputFilePath.str(), set.fileNames[i]);
             outputFileName.stripExtension();
             if (set.type == ImageType_HDR) outputFileName.append(".dds");
-            else outputFileName.append(".png");
+            else outputFileName.append(".tga");
             if (!img_out.save(outputFileName.str()))
             {
                 printf("Error saving file '%s'.\n", outputFileName.str());
@@ -886,7 +886,7 @@ int main(int argc, char *argv[])
 
             outputFileName.format("%s/%s", outputFilePath.str(), set.fileNames[i]);
             outputFileName.stripExtension();
-            outputFileName.append("_diff.png");
+            outputFileName.append("_diff.tga");
             diff.save(outputFileName.str());