Add BC6 support to nvtt lib and utils.

- Use 3x3 eigensolver for initial fit in ZOH. Slightly better perf and RMSE than power method. - Remove use of double precision in ZOH - speeds up by 12%. - Fixed RGBM encoding that was broken for HDR images. - Use gamma-2.0 space for RGBM for HDR images (improves precision in darks). - Use UNORM instead of TYPELESS formats when saving a DX10 .dds file. The TYPELESS formats break most viewers. - Cleaned up warnings in ZOH code. - Command-line utils will warn if you give them an unrecognized parameter. - Added VS2010 profiling results.
2013-10-25 17:30:55 +00:00
parent 77188a42ac
commit 474239c784
43 changed files with 1610 additions and 1161 deletions
--- a/src/nvimage/BlockDXT.cpp
+++ b/src/nvimage/BlockDXT.cpp
@ -26,6 +26,10 @@

 #include "nvcore/Stream.h"
 #include "nvcore/Utils.h" // swap
+#include "nvmath/Half.h"
+
+#include "nvtt/bc6h/zoh.h"
+#include "nvtt/bc6h/utils.h"


 using namespace nv;
@ -610,6 +614,33 @@ void BlockCTX1::setIndices(int * idx)
 }


+/// Decode BC6 block.
+void BlockBC6::decodeBlock(ColorSet * set) const
+{
+	Tile tile(4, 4);
+	ZOH::decompress((const char *)data, tile);
+
+	// Convert ZOH's tile struct back to NVTT's, and convert half to float.
+	set->allocate(4, 4);
+	for (uint y = 0; y < 4; ++y)
+	{
+		for (uint x = 0; x < 4; ++x)
+		{
+			uint16 rHalf = Tile::float2half(tile.data[y][x].x);
+			uint16 gHalf = Tile::float2half(tile.data[y][x].y);
+			uint16 bHalf = Tile::float2half(tile.data[y][x].z);
+			set->colors[y * 4 + x].x = to_float(rHalf);
+			set->colors[y * 4 + x].y = to_float(gHalf);
+			set->colors[y * 4 + x].z = to_float(bHalf);
+			set->colors[y * 4 + x].w = 1.0f;
+
+			// Set indices in case someone uses them
+			set->indices[y * 4 + x] = y * 4 + x;
+		}
+	}
+}
+
+
 /// Flip CTX1 block vertically.
 inline void BlockCTX1::flip4()
 {
@ -671,3 +702,8 @@ Stream & nv::operator<<(Stream & stream, BlockCTX1 & block)
    return stream;
 }

+Stream & nv::operator<<(Stream & stream, BlockBC6 & block)
+{
+    stream.serialize(&block, sizeof(block));
+    return stream;
+}
--- a/src/nvimage/BlockDXT.h
+++ b/src/nvimage/BlockDXT.h
@ -32,6 +32,7 @@
 namespace nv
 {
    struct ColorBlock;
+	struct ColorSet;
    class Stream;


@ -212,6 +213,15 @@ namespace nv
        void flip2();
    };

+	/// BC6 block.
+	struct BlockBC6
+	{
+		uint8 data[16];		// Not even going to try to write a union for this thing.
+		void decodeBlock(ColorSet * set) const;
+	};
+
+	/// !!!UNDONE: BC7 block
+

    // Serialization functions.
    NVIMAGE_API Stream & operator<<(Stream & stream, BlockDXT1 & block);
@ -222,6 +232,7 @@ namespace nv
    NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI1 & block);
    NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block);
    NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block);
+    NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC6 & block);

 } // nv namespace

--- a/src/nvimage/DirectDrawSurface.cpp
+++ b/src/nvimage/DirectDrawSurface.cpp
@ -950,7 +950,8 @@ bool DirectDrawSurface::isSupported() const
            header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM ||
            header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM ||
            header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM ||
-            header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM)
+            header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM ||
+			header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16)
        {
            return true;
        }
@ -1340,13 +1341,12 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
    if (header.hasDX10Header())
    {
        if (header.header10.dxgiFormat == DXGI_FORMAT_BC1_UNORM) fourcc = FOURCC_DXT1;
-        if (header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM) fourcc = FOURCC_DXT3;
-        if (header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM) fourcc = FOURCC_DXT5;
-        if (header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM) fourcc = FOURCC_ATI1;
-        if (header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) fourcc = FOURCC_ATI2;
+        else if (header.header10.dxgiFormat == DXGI_FORMAT_BC2_UNORM) fourcc = FOURCC_DXT3;
+        else if (header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM) fourcc = FOURCC_DXT5;
+        else if (header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM) fourcc = FOURCC_ATI1;
+        else if (header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM) fourcc = FOURCC_ATI2;
    }

-
    if (fourcc == FOURCC_DXT1)
    {
        BlockDXT1 block;
@ -1389,6 +1389,31 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
        *stream << block;
        block.decodeBlock(rgba);
    }
+	else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16)
+	{
+		BlockBC6 block;
+		*stream << block;
+		ColorSet set;
+		block.decodeBlock(&set);
+
+		// Clamp to [0, 1] and round to 8-bit
+		for (int y = 0; y < 4; ++y)
+		{
+			for (int x = 0; x < 4; ++x)
+			{
+				Vector4 px = set.colors[y*4 + x];
+				rgba->color(x, y).setRGBA(
+									uint8(clamp(px.x, 0.0f, 1.0f) * 255.0f + 0.5f),
+									uint8(clamp(px.y, 0.0f, 1.0f) * 255.0f + 0.5f),
+									uint8(clamp(px.z, 0.0f, 1.0f) * 255.0f + 0.5f),
+									uint8(clamp(px.w, 0.0f, 1.0f) * 255.0f + 0.5f));
+			}
+		}
+	}
+	else
+	{
+		nvDebugCheck(false);
+	}

    // If normal flag set, convert to normal.
    if (header.pf.flags & DDPF_NORMAL)
--- a/src/nvimage/ErrorMetric.cpp
+++ b/src/nvimage/ErrorMetric.cpp
@ -1,294 +1,294 @@
-
-#include "ErrorMetric.h"
-#include "FloatImage.h"
-#include "Filter.h"
-
-#include "nvmath/Matrix.h"
-#include "nvmath/Vector.inl"
-
-#include <float.h> // FLT_MAX
-
-using namespace nv;
-
-float nv::rmsColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight)
-{
-    if (!sameLayout(img, ref)) {
-        return FLT_MAX;
-    }
-    nvDebugCheck(img->componentCount() == 4);
-    nvDebugCheck(ref->componentCount() == 4);
-
-    double mse = 0;
-
-    const uint count = img->pixelCount();
-    for (uint i = 0; i < count; i++)
-    {
-        float r0 = img->pixel(i + count * 0);
-        float g0 = img->pixel(i + count * 1);
-        float b0 = img->pixel(i + count * 2);
-        //float a0 = img->pixel(i + count * 3);
-        float r1 = ref->pixel(i + count * 0);
-        float g1 = ref->pixel(i + count * 1);
-        float b1 = ref->pixel(i + count * 2);
-        float a1 = ref->pixel(i + count * 3);
-
-        float r = r0 - r1;
-        float g = g0 - g1;
-        float b = b0 - b1;
-
-        float a = 1;
-        if (alphaWeight) a = a1;
-
-        mse += r * r * a;
-        mse += g * g * a;
-        mse += b * b * a;
-    }
-
-    return float(sqrt(mse / count));
-}
-
-float nv::rmsAlphaError(const FloatImage * img, const FloatImage * ref)
-{
-    if (!sameLayout(img, ref)) {
-        return FLT_MAX;
-    }
-    nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
-
-    double mse = 0;
-
-    const uint count = img->pixelCount();
-    for (uint i = 0; i < count; i++)
-    {
-        float a0 = img->pixel(i + count * 3);
-        float a1 = ref->pixel(i + count * 3);
-
-        float a = a0 - a1;
-
-        mse += a * a;
-    }
-
-    return float(sqrt(mse / count));
-}
-
-
-float nv::averageColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight)
-{
-    if (!sameLayout(img, ref)) {
-        return FLT_MAX;
-    }
-    nvDebugCheck(img->componentCount() == 4);
-    nvDebugCheck(ref->componentCount() == 4);
-
-    double mae = 0;
-
-    const uint count = img->pixelCount();
-    for (uint i = 0; i < count; i++)
-    {
-        float r0 = img->pixel(i + count * 0);
-        float g0 = img->pixel(i + count * 1);
-        float b0 = img->pixel(i + count * 2);
-        //float a0 = img->pixel(i + count * 3);
-        float r1 = ref->pixel(i + count * 0);
-        float g1 = ref->pixel(i + count * 1);
-        float b1 = ref->pixel(i + count * 2);
-        float a1 = ref->pixel(i + count * 3);
-
-        float r = fabs(r0 - r1);
-        float g = fabs(g0 - g1);
-        float b = fabs(b0 - b1);
-
-        float a = 1;
-        if (alphaWeight) a = a1;
-
-        mae += r * a;
-        mae += g * a;
-        mae += b * a;
-    }
-
-    return float(mae / count);
-}
-
-float nv::averageAlphaError(const FloatImage * img, const FloatImage * ref)
-{
-    if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
-        return FLT_MAX;
-    }
-    nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
-
-    double mae = 0;
-
-    const uint count = img->width() * img->height();
-    for (uint i = 0; i < count; i++)
-    {
-        float a0 = img->pixel(i + count * 3);
-        float a1 = ref->pixel(i + count * 3);
-
-        float a = a0 - a1;
-
-        mae += fabs(a);
-    }
-
-    return float(mae / count);
-}
-
-
-// Color space conversions based on:
-// http://www.brucelindbloom.com/
-
-// Assumes input is in *linear* sRGB color space.
-static Vector3 rgbToXyz(Vector3::Arg c)
-{
-    Vector3 xyz;
-    xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z;
-    xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z;
-    xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z;
-    return xyz;
-}
-
-static Vector3 xyzToRgb(Vector3::Arg c)
-{
-    Vector3 rgb;
-    rgb.x =  3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z;
-    rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z;
-    rgb.z =  0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z;
-    return rgb;
-}
-
-static float toLinear(float f)
-{
-    return powf(f, 2.2f);
-}
-
-static float toGamma(float f)
-{
-    // @@ Use sRGB space?
-    return powf(f, 1.0f/2.2f);
-}
-
-static Vector3 toLinear(Vector3::Arg c)
-{
-    return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z));
-}
-
-static Vector3 toGamma(Vector3::Arg c)
-{
-    return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z));
-}
-
-static float f(float t)
-{
-    const float epsilon = powf(6.0f/29.0f, 3);
-
-    if (t > epsilon) {
-        return powf(t, 1.0f/3.0f);
-    }
-    else {
-        return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f;
-    }
-}
-
-static float finv(float t)
-{
-    if (t > 6.0f / 29.0f) {
-        return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f);
-    }
-    else {
-        return powf(t, 3.0f);
-    }
-}
-
-static Vector3 xyzToCieLab(Vector3::Arg c)
-{
-    // Normalized white point.
-    const float Xn = 0.950456f;
-    const float Yn = 1.0f;
-    const float Zn = 1.088754f;
-
-    float Xr = c.x / Xn;
-    float Yr = c.y / Yn;
-    float Zr = c.z / Zn;
-
-    float fx = f(Xr);
-    float fy = f(Yr);
-    float fz = f(Zr);
-
-    float L = 116 * fx - 16;
-    float a = 500 * (fx - fy);
-    float b = 200 * (fy - fz);
-
-    return Vector3(L, a, b);
-}
-
-static Vector3 rgbToCieLab(Vector3::Arg c)
-{
-    return xyzToCieLab(rgbToXyz(toLinear(c)));
-}
-
+
+#include "ErrorMetric.h"
+#include "FloatImage.h"
+#include "Filter.h"
+
+#include "nvmath/Matrix.h"
+#include "nvmath/Vector.inl"
+
+#include <float.h> // FLT_MAX
+
+using namespace nv;
+
+float nv::rmsColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight)
+{
+    if (!sameLayout(img, ref)) {
+        return FLT_MAX;
+    }
+    nvDebugCheck(img->componentCount() == 4);
+    nvDebugCheck(ref->componentCount() == 4);
+
+    double mse = 0;
+
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++)
+    {
+        float r0 = img->pixel(i + count * 0);
+        float g0 = img->pixel(i + count * 1);
+        float b0 = img->pixel(i + count * 2);
+        //float a0 = img->pixel(i + count * 3);
+        float r1 = ref->pixel(i + count * 0);
+        float g1 = ref->pixel(i + count * 1);
+        float b1 = ref->pixel(i + count * 2);
+        float a1 = ref->pixel(i + count * 3);
+
+        float r = r0 - r1;
+        float g = g0 - g1;
+        float b = b0 - b1;
+
+        float a = 1;
+        if (alphaWeight) a = a1;
+
+        mse += r * r * a;
+        mse += g * g * a;
+        mse += b * b * a;
+    }
+
+    return float(sqrt(mse / count));
+}
+
+float nv::rmsAlphaError(const FloatImage * img, const FloatImage * ref)
+{
+    if (!sameLayout(img, ref)) {
+        return FLT_MAX;
+    }
+    nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
+
+    double mse = 0;
+
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++)
+    {
+        float a0 = img->pixel(i + count * 3);
+        float a1 = ref->pixel(i + count * 3);
+
+        float a = a0 - a1;
+
+        mse += a * a;
+    }
+
+    return float(sqrt(mse / count));
+}
+
+
+float nv::averageColorError(const FloatImage * img, const FloatImage * ref, bool alphaWeight)
+{
+    if (!sameLayout(img, ref)) {
+        return FLT_MAX;
+    }
+    nvDebugCheck(img->componentCount() == 4);
+    nvDebugCheck(ref->componentCount() == 4);
+
+    double mae = 0;
+
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++)
+    {
+        float r0 = img->pixel(i + count * 0);
+        float g0 = img->pixel(i + count * 1);
+        float b0 = img->pixel(i + count * 2);
+        //float a0 = img->pixel(i + count * 3);
+        float r1 = ref->pixel(i + count * 0);
+        float g1 = ref->pixel(i + count * 1);
+        float b1 = ref->pixel(i + count * 2);
+        float a1 = ref->pixel(i + count * 3);
+
+        float r = fabs(r0 - r1);
+        float g = fabs(g0 - g1);
+        float b = fabs(b0 - b1);
+
+        float a = 1;
+        if (alphaWeight) a = a1;
+
+        mae += r * a;
+        mae += g * a;
+        mae += b * a;
+    }
+
+    return float(mae / count);
+}
+
+float nv::averageAlphaError(const FloatImage * img, const FloatImage * ref)
+{
+    if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
+        return FLT_MAX;
+    }
+    nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
+
+    double mae = 0;
+
+    const uint count = img->width() * img->height();
+    for (uint i = 0; i < count; i++)
+    {
+        float a0 = img->pixel(i + count * 3);
+        float a1 = ref->pixel(i + count * 3);
+
+        float a = a0 - a1;
+
+        mae += fabs(a);
+    }
+
+    return float(mae / count);
+}
+
+
+// Color space conversions based on:
+// http://www.brucelindbloom.com/
+
+// Assumes input is in *linear* sRGB color space.
+static Vector3 rgbToXyz(Vector3::Arg c)
+{
+    Vector3 xyz;
+    xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z;
+    xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z;
+    xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z;
+    return xyz;
+}
+
+static Vector3 xyzToRgb(Vector3::Arg c)
+{
+    Vector3 rgb;
+    rgb.x =  3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z;
+    rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z;
+    rgb.z =  0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z;
+    return rgb;
+}
+
+static float toLinear(float f)
+{
+    return powf(f, 2.2f);
+}
+
+static float toGamma(float f)
+{
+    // @@ Use sRGB space?
+    return powf(f, 1.0f/2.2f);
+}
+
+static Vector3 toLinear(Vector3::Arg c)
+{
+    return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z));
+}
+
+static Vector3 toGamma(Vector3::Arg c)
+{
+    return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z));
+}
+
+static float f(float t)
+{
+    const float epsilon = powf(6.0f/29.0f, 3);
+
+    if (t > epsilon) {
+        return powf(t, 1.0f/3.0f);
+    }
+    else {
+        return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f;
+    }
+}
+
+static float finv(float t)
+{
+    if (t > 6.0f / 29.0f) {
+        return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f);
+    }
+    else {
+        return powf(t, 3.0f);
+    }
+}
+
+static Vector3 xyzToCieLab(Vector3::Arg c)
+{
+    // Normalized white point.
+    const float Xn = 0.950456f;
+    const float Yn = 1.0f;
+    const float Zn = 1.088754f;
+
+    float Xr = c.x / Xn;
+    float Yr = c.y / Yn;
+    float Zr = c.z / Zn;
+
+    float fx = f(Xr);
+    float fy = f(Yr);
+    float fz = f(Zr);
+
+    float L = 116 * fx - 16;
+    float a = 500 * (fx - fy);
+    float b = 200 * (fy - fz);
+
+    return Vector3(L, a, b);
+}
+
+static Vector3 rgbToCieLab(Vector3::Arg c)
+{
+    return xyzToCieLab(rgbToXyz(toLinear(c)));
+}
+
 // h is hue-angle in radians
 static Vector3 cieLabToLCh(Vector3::Arg c)
 {
    return Vector3(c.x, sqrtf(c.y*c.y + c.z*c.z), atan2f(c.y, c.z));
 }
-
-static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage)
-{
-    nvDebugCheck(rgbImage != NULL && LabImage != NULL);
-    nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height());
-    nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3);
-
-    const uint w = rgbImage->width();
-    const uint h = LabImage->height();
-
-    const float * R = rgbImage->channel(0);
-    const float * G = rgbImage->channel(1);
-    const float * B = rgbImage->channel(2);
-
-    float * L = LabImage->channel(0);
-    float * a = LabImage->channel(1);
-    float * b = LabImage->channel(2);
-
-    const uint count = w*h;
-    for (uint i = 0; i < count; i++)
-    {
-        Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i]));
-        L[i] = Lab.x;
-        a[i] = Lab.y;
-        b[i] = Lab.z;
-    }
-}
-
-
-// Assumes input images are in linear sRGB space.
-float nv::cieLabError(const FloatImage * img0, const FloatImage * img1)
-{
-    if (!sameLayout(img0, img1)) return FLT_MAX;
-    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
-
-    const float * r0 = img0->channel(0);
-    const float * g0 = img0->channel(1);
-    const float * b0 = img0->channel(2);
-
-    const float * r1 = img1->channel(0);
-    const float * g1 = img1->channel(1);
-    const float * b1 = img1->channel(2);
-
-    double error = 0.0f;
-
-    const uint count = img0->pixelCount();
-    for (uint i = 0; i < count; i++)
-    {
-        Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
-        Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i]));
-
-        // @@ Measure Delta E.
-        Vector3 delta = lab0 - lab1;
-        
-        error += length(delta);
-    }
-
-    return float(error / count);
-}
-
+
+static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage)
+{
+    nvDebugCheck(rgbImage != NULL && LabImage != NULL);
+    nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height());
+    nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3);
+
+    const uint w = rgbImage->width();
+    const uint h = LabImage->height();
+
+    const float * R = rgbImage->channel(0);
+    const float * G = rgbImage->channel(1);
+    const float * B = rgbImage->channel(2);
+
+    float * L = LabImage->channel(0);
+    float * a = LabImage->channel(1);
+    float * b = LabImage->channel(2);
+
+    const uint count = w*h;
+    for (uint i = 0; i < count; i++)
+    {
+        Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i]));
+        L[i] = Lab.x;
+        a[i] = Lab.y;
+        b[i] = Lab.z;
+    }
+}
+
+
+// Assumes input images are in linear sRGB space.
+float nv::cieLabError(const FloatImage * img0, const FloatImage * img1)
+{
+    if (!sameLayout(img0, img1)) return FLT_MAX;
+    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
+
+    const float * r0 = img0->channel(0);
+    const float * g0 = img0->channel(1);
+    const float * b0 = img0->channel(2);
+
+    const float * r1 = img1->channel(0);
+    const float * g1 = img1->channel(1);
+    const float * b1 = img1->channel(2);
+
+    double error = 0.0f;
+
+    const uint count = img0->pixelCount();
+    for (uint i = 0; i < count; i++)
+    {
+        Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
+        Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i]));
+
+        // @@ Measure Delta E.
+        Vector3 delta = lab0 - lab1;
+        
+        error += length(delta);
+    }
+
+    return float(error / count);
+}
+
 // Assumes input images are in linear sRGB space.
 float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1)
 {
@ -339,122 +339,122 @@ float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1)
    }

    return float(error / count);
-}
-
-float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1)
-{
-    if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
-        return FLT_MAX;
-    }
-    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
-
-    uint w = img0->width();
-    uint h = img0->height();
-    uint d = img0->depth();
-
-    FloatImage lab0, lab1; // Original images in CIE-Lab space.
-    lab0.allocate(3, w, h, d);
-    lab1.allocate(3, w, h, d);
-
-    // Convert input images to CIE-Lab.
-    rgbToCieLab(img0, &lab0);
-    rgbToCieLab(img1, &lab1);
-
-    // @@ Convolve each channel by the corresponding filter.
-    /*
-    GaussianFilter LFilter(5);
-    GaussianFilter aFilter(5);
-    GaussianFilter bFilter(5);
-
-    lab0.convolve(0, LFilter);
-    lab0.convolve(1, aFilter);
-    lab0.convolve(2, bFilter);
-
-    lab1.convolve(0, LFilter);
-    lab1.convolve(1, aFilter);
-    lab1.convolve(2, bFilter);
-    */
-    // @@ Measure Delta E between lab0 and lab1.
-
-    return 0.0f;
-}
-
-
-// Assumes input images are normal maps.
-float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1)
-{
-    if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
-        return FLT_MAX;
-    }
-    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
-
-    uint w = img0->width();
-    uint h = img0->height();
-
-    const float * x0 = img0->channel(0);
-    const float * y0 = img0->channel(1);
-    const float * z0 = img0->channel(2);
-
-    const float * x1 = img1->channel(0);
-    const float * y1 = img1->channel(1);
-    const float * z1 = img1->channel(2);
-
-    double error = 0.0f;
-
-    const uint count = w*h;
-    for (uint i = 0; i < count; i++)
-    {
-        Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
-        Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
-
-        n0 = 2.0f * n0 - Vector3(1);
-        n1 = 2.0f * n1 - Vector3(1);
-
-        n0 = normalizeSafe(n0, Vector3(0), 0.0f);
-        n1 = normalizeSafe(n1, Vector3(0), 0.0f);
-
-        error += acos(clamp(dot(n0, n1), -1.0f, 1.0f));
-    }
-
-    return float(error / count);
-}
-
-float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1)
-{
-    if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
-        return FLT_MAX;
-    }
-    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
-
-    uint w = img0->width();
-    uint h = img0->height();
-
-    const float * x0 = img0->channel(0);
-    const float * y0 = img0->channel(1);
-    const float * z0 = img0->channel(2);
-
-    const float * x1 = img1->channel(0);
-    const float * y1 = img1->channel(1);
-    const float * z1 = img1->channel(2);
-
-    double error = 0.0f;
-
-    const uint count = w*h;
-    for (uint i = 0; i < count; i++)
-    {
-        Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
-        Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
-
-        n0 = 2.0f * n0 - Vector3(1);
-        n1 = 2.0f * n1 - Vector3(1);
-
-        n0 = normalizeSafe(n0, Vector3(0), 0.0f);
-        n1 = normalizeSafe(n1, Vector3(0), 0.0f);
-
-        float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f));
-        error += angle * angle;
-    }
-
-    return float(sqrt(error / count));
-}
-
+}
+
+float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1)
+{
+    if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
+        return FLT_MAX;
+    }
+    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
+
+    uint w = img0->width();
+    uint h = img0->height();
+    uint d = img0->depth();
+
+    FloatImage lab0, lab1; // Original images in CIE-Lab space.
+    lab0.allocate(3, w, h, d);
+    lab1.allocate(3, w, h, d);
+
+    // Convert input images to CIE-Lab.
+    rgbToCieLab(img0, &lab0);
+    rgbToCieLab(img1, &lab1);
+
+    // @@ Convolve each channel by the corresponding filter.
+    /*
+    GaussianFilter LFilter(5);
+    GaussianFilter aFilter(5);
+    GaussianFilter bFilter(5);
+
+    lab0.convolve(0, LFilter);
+    lab0.convolve(1, aFilter);
+    lab0.convolve(2, bFilter);
+
+    lab1.convolve(0, LFilter);
+    lab1.convolve(1, aFilter);
+    lab1.convolve(2, bFilter);
+    */
+    // @@ Measure Delta E between lab0 and lab1.
+
+    return 0.0f;
+}
+
+
+// Assumes input images are normal maps.
+float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1)
+{
+    if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
+        return FLT_MAX;
+    }
+    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
+
+    uint w = img0->width();
+    uint h = img0->height();
+
+    const float * x0 = img0->channel(0);
+    const float * y0 = img0->channel(1);
+    const float * z0 = img0->channel(2);
+
+    const float * x1 = img1->channel(0);
+    const float * y1 = img1->channel(1);
+    const float * z1 = img1->channel(2);
+
+    double error = 0.0f;
+
+    const uint count = w*h;
+    for (uint i = 0; i < count; i++)
+    {
+        Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
+        Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
+
+        n0 = 2.0f * n0 - Vector3(1);
+        n1 = 2.0f * n1 - Vector3(1);
+
+        n0 = normalizeSafe(n0, Vector3(0), 0.0f);
+        n1 = normalizeSafe(n1, Vector3(0), 0.0f);
+
+        error += acos(clamp(dot(n0, n1), -1.0f, 1.0f));
+    }
+
+    return float(error / count);
+}
+
+float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1)
+{
+    if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
+        return FLT_MAX;
+    }
+    nvDebugCheck(img0->componentCount() == 4 && img0->componentCount() == 4);
+
+    uint w = img0->width();
+    uint h = img0->height();
+
+    const float * x0 = img0->channel(0);
+    const float * y0 = img0->channel(1);
+    const float * z0 = img0->channel(2);
+
+    const float * x1 = img1->channel(0);
+    const float * y1 = img1->channel(1);
+    const float * z1 = img1->channel(2);
+
+    double error = 0.0f;
+
+    const uint count = w*h;
+    for (uint i = 0; i < count; i++)
+    {
+        Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
+        Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
+
+        n0 = 2.0f * n0 - Vector3(1);
+        n1 = 2.0f * n1 - Vector3(1);
+
+        n0 = normalizeSafe(n0, Vector3(0), 0.0f);
+        n1 = normalizeSafe(n1, Vector3(0), 0.0f);
+
+        float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f));
+        error += angle * angle;
+    }
+
+    return float(sqrt(error / count));
+}
+