diff --git a/project/vc9/nvtt/nvtt.vcproj b/project/vc9/nvtt/nvtt.vcproj
index d0de758..8a867c8 100644
--- a/project/vc9/nvtt/nvtt.vcproj
+++ b/project/vc9/nvtt/nvtt.vcproj
@@ -1052,14 +1052,6 @@
 			RelativePath="..\..\..\src\nvtt\CompressorRGB.h"
 			>
 		</File>
-		<File
-			RelativePath="..\..\..\src\nvtt\CompressorRGBE.cpp"
-			>
-		</File>
-		<File
-			RelativePath="..\..\..\src\nvtt\CompressorRGBE.h"
-			>
-		</File>
 		<File
 			RelativePath="..\..\..\src\nvtt\Context.cpp"
 			>
diff --git a/src/nvimage/FloatImage.cpp b/src/nvimage/FloatImage.cpp
index 0265a2a..f0405b9 100644
--- a/src/nvimage/FloatImage.cpp
+++ b/src/nvimage/FloatImage.cpp
@@ -956,7 +956,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, W
 
 void FloatImage::convolve(const Kernel2 & k, uint c, WrapMode wm)
 {
-    AutoPtr<FloatImage> tmpImage = clone();
+    AutoPtr<FloatImage> tmpImage(clone());
 
     uint w = m_width;
     uint h = m_height;
diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h
index 8163eb3..aa28a84 100644
--- a/src/nvmath/nvmath.h
+++ b/src/nvmath/nvmath.h
@@ -152,11 +152,13 @@ namespace nv
         return value;
     }
 
+#if NV_CC_MSVC
     inline float log2f(float x)
     {
         nvCheck(x >= 0);
         return logf(x) / logf(2.0f);
     }
+#endif
 
     inline float lerp(float f0, float f1, float t)
     {
@@ -195,10 +197,9 @@ namespace nv
         return f - floor(f);
     }
 
-    inline float fround(float f)    // @@ rename floatRound
+    inline float floatRound(float f)
     {
-        // @@ Do something better.
-        return float(iround(f));
+        return floorf(f + 0.5f);
     }
 
     // Eliminates negative zeros from a float array.
@@ -234,6 +235,22 @@ namespace nv
         return 0;
     }
 
+    // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
+
+    // Quantize a [0, 1] full precision float, using exact endpoints.
+    inline float quantizeFloat(float f, int bits) {
+        float scale = (1 << bits) - 1;
+        float offset = 0.0f;
+        return floor(saturate(f) * scale + offset) / scale;
+    }
+
+    // Quantize a [0, 1] full precision float, using uniform bins.
+    /*inline float quantizeFloat(float f, int bits) {
+        float scale = (1 << bits);
+        float offset = 0.5f;
+        return floor(saturate(f) * scale + offset) / scale;
+    }*/
+
 } // nv
 
 #endif // NV_MATH_H
diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt
index e60f35b..3a0da41 100644
--- a/src/nvtt/CMakeLists.txt
+++ b/src/nvtt/CMakeLists.txt
@@ -14,7 +14,6 @@ SET(NVTT_SRCS
     CompressorDX10.h CompressorDX10.cpp
 #    CompressorDX11.h CompressorDX11.cpp
     CompressorRGB.h CompressorRGB.cpp
-    CompressorRGBE.h CompressorRGBE.cpp
     Context.h Context.cpp
     QuickCompressDXT.h QuickCompressDXT.cpp
     OptimalCompressDXT.h OptimalCompressDXT.cpp
diff --git a/src/nvtt/CompressorRGBE.cpp b/src/nvtt/CompressorRGBE.cpp
deleted file mode 100644
index 3dccbac..0000000
--- a/src/nvtt/CompressorRGBE.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
-// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#include "CompressorRGBE.h"
-#include "CompressionOptions.h"
-#include "OutputOptions.h"
-
-#include "nvimage/Image.h"
-#include "nvimage/FloatImage.h"
-
-#include "nvmath/Color.h"
-
-#include "nvcore/Debug.h"
-
-using namespace nv;
-using namespace nvtt;
-
-static Color32 toRgbe8(float r, float g, float b)
-{
-    Color32 c;
-    float v = max(max(r, g), b);
-    if (v < 1e-32) {
-        c.r = c.g = c.b = c.a = 0;
-    }
-    else {
-        int e;
-        v = frexp(v, &e) * 256.0f / v;
-        c.r = uint8(clamp(r * v, 0.0f, 255.0f));
-        c.g = uint8(clamp(g * v, 0.0f, 255.0f));
-        c.b = uint8(clamp(b * v, 0.0f, 255.0f));
-        c.a = e + 128;
-    }
-
-    return c;
-}
-
-
-void CompressorRGBE::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
-{
-    nvDebugCheck (compressionOptions.format == nvtt::Format_RGBE);
-
-    uint srcPitch = w;
-    uint srcPlane = w * h * d;
-
-    // Allocate output scanline.
-    Color32 * dst = new Color32[w];
-
-    for (uint y = 0; y < h*d; y++)
-    {
-        const float * src = (const float *)data + y * srcPitch;
-
-        for (uint x = 0; x < w; x++)
-        {
-            float r = src[x + 0 * srcPlane];
-            float g = src[x + 1 * srcPlane];
-            float b = src[x + 2 * srcPlane];
-
-            dst[x] = toRgbe8(r, g, b);
-        }
-
-        if (outputOptions.outputHandler != NULL)
-        {
-            outputOptions.outputHandler->writeData(dst, w * 4);
-        }
-    }
-
-    delete [] dst;
-}
diff --git a/src/nvtt/CompressorRGBE.h b/src/nvtt/CompressorRGBE.h
deleted file mode 100644
index f2905bc..0000000
--- a/src/nvtt/CompressorRGBE.h
+++ /dev/null
@@ -1,39 +0,0 @@
-// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
-// 
-// Permission is hereby granted, free of charge, to any person
-// obtaining a copy of this software and associated documentation
-// files (the "Software"), to deal in the Software without
-// restriction, including without limitation the rights to use,
-// copy, modify, merge, publish, distribute, sublicense, and/or sell
-// copies of the Software, and to permit persons to whom the
-// Software is furnished to do so, subject to the following
-// conditions:
-// 
-// The above copyright notice and this permission notice shall be
-// included in all copies or substantial portions of the Software.
-// 
-// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
-// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
-// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
-// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
-// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
-// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
-// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
-// OTHER DEALINGS IN THE SOFTWARE.
-
-#ifndef NVTT_COMPRESSORRGBE_H
-#define NVTT_COMPRESSORRGBE_H
-
-#include "Compressor.h"
-
-namespace nv
-{
-    struct CompressorRGBE : public CompressorInterface
-    {
-        virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
-    };
-
-} // nv namespace
-
-
-#endif // NVTT_COMPRESSORRGBE_H
diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp
index 8baec6f..ce1cd5c 100644
--- a/src/nvtt/Context.cpp
+++ b/src/nvtt/Context.cpp
@@ -35,7 +35,6 @@
 #include "CompressorDX10.h"
 #include "CompressorDX11.h"
 #include "CompressorRGB.h"
-#include "CompressorRGBE.h"
 #include "cuda/CudaUtils.h"
 #include "cuda/CudaCompressorDXT.h"
 
@@ -763,10 +762,6 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression
     {
         // Not supported.
     }
-    else if (compressionOptions.format == Format_RGBE)
-    {
-        return new CompressorRGBE;
-    }
 
     return NULL;
 }
diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp
index 1586ce0..660d27d 100644
--- a/src/nvtt/Surface.cpp
+++ b/src/nvtt/Surface.cpp
@@ -1310,6 +1310,232 @@ void Surface::fromRGBM(float range/*= 1*/)
 }
 
 
+static Color32 toRgbe8(float r, float g, float b)
+{
+    Color32 c;
+    float v = max(max(r, g), b);
+    if (v < 1e-32) {
+        c.r = c.g = c.b = c.a = 0;
+    }
+    else {
+        int e;
+        v = frexp(v, &e) * 256.0f / v;
+        c.r = uint8(clamp(r * v, 0.0f, 255.0f));
+        c.g = uint8(clamp(g * v, 0.0f, 255.0f));
+        c.b = uint8(clamp(b * v, 0.0f, 255.0f));
+        c.a = e + 128;
+    }
+
+    return c;
+}
+
+
+/*
+  Alen Ladavac @ GDAlgorithms-list on Feb 7, 2007:
+    One trick that we use to alleviate such problems is to use RGBE5.3 -
+    i.e. have a fixed point exponent. Note that it is not enough to just
+    shift the exponent up for 3 bits, but you actually have to convert
+    each pixel in the RGBE8 texture by unpacking it to floats and then
+    repacking it with a non-integer exponent, which gives different
+    mantissas as well. Now your jumps in exponent are much smaller, thus
+    the bands are not that noticeable. It is still not as good as FP16,
+    but it is much better than RGBE8. I hope this explanation is
+    understandable, if not I can fill in more details.
+
+    Though there still are some bands, you can get an even better
+    precision if you upload that same texture as RGBA16, because you'll
+    get even more interpolation then, and it works good as a scalable
+    option for people with more GPU RAM). Alternatively, when some of the
+    future cards (hopefully, because I'm trying to lobby for that
+    everywhere :) ), start returning more than 8 bits, your scenes will
+    automatically look better even without using RGBA16.
+
+  Jon Watte:
+    The interpolation of 5.3 is the same as that of 8 bits, because it's a
+    fixed point format.
+
+    The reason using 5.3 helps, is that each bit of quantization in the
+    interpolation only means 1/8th of a fully significant bit. The
+    quantization still happens, it's just less visible. The trade-off is
+    that you get less dynamic range.
+
+  Alen Ladavac:
+    True, but it is just a small part of the improvement. The greater part
+    is that RGB values have to be calculated according to the fractional
+    exponent. With integer exponent, the RGB values jump by a factor of 2
+    when each bit changes in exponent, and 5.3 with correct adjustment of
+    RGB lowers this jump to be about 1.09, which is much better. I may not
+    be entirely correct on the numbers, which I'm pulling out from my
+    memory now, but it's a rough estimate.
+*/
+/* Ward's version:
+static Color32 toRgbe8(float r, float g, float b)
+{
+    Color32 c;
+    float v = max(max(r, g), b);
+    if (v < 1e-32) {
+        c.r = c.g = c.b = c.a = 0;
+    }
+    else {
+        int e;
+        v = frexp(v, &e) * 256.0f / v;
+        c.r = uint8(clamp(r * v, 0.0f, 255.0f));
+        c.g = uint8(clamp(g * v, 0.0f, 255.0f));
+        c.b = uint8(clamp(b * v, 0.0f, 255.0f));
+        c.a = e + 128;
+    }
+
+    return c;
+}
+*/
+// For R9G9B9E5, use toRGBE(9, 5), for Ward's RGBE, use toRGBE(8, 8)
+void Surface::toRGBE(int mantissaBits, int exponentBits)
+{
+    // According to the OpenGL extension:
+    // http://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt
+    //
+    // Components red, green, and blue are first clamped (in the process,
+    // mapping NaN to zero) so:
+    //
+    //     red_c   = max(0, min(sharedexp_max, red))
+    //     green_c = max(0, min(sharedexp_max, green))
+    //     blue_c  = max(0, min(sharedexp_max, blue))
+    //
+    // where sharedexp_max is (2^N-1)/2^N * 2^(Emax-B), N is the number
+    // of mantissa bits per component, Emax is the maximum allowed biased
+    // exponent value (careful: not necessarily 2^E-1 when E is the number of
+    // exponent bits), bits, and B is the exponent bias.  For the RGB9_E5_EXT
+    // format, N=9, Emax=31, and B=15.
+    //
+    // The largest clamped component, max_c, is determined:
+    //
+    //     max_c = max(red_c, green_c, blue_c)
+    //
+    // A preliminary shared exponent is computed:
+    //
+    //     exp_shared_p = max(-B-1, floor(log2(max_c))) + 1 + B
+    //
+    // A refined shared exponent is then computed as:
+    //
+    //     max_s   = floor(max_c   / 2^(exp_shared_p - B - N) + 0.5)
+    //
+    //                  { exp_shared_p,    0 <= max_s <  2^N
+    //     exp_shared = {
+    //                  { exp_shared_p+1,       max_s == 2^N
+    //
+    // These integers values in the range 0 to 2^N-1 are then computed:
+    //
+    //     red_s   = floor(red_c   / 2^(exp_shared - B - N) + 0.5)
+    //     green_s = floor(green_c / 2^(exp_shared - B - N) + 0.5)
+    //     blue_s  = floor(blue_c  / 2^(exp_shared - B - N) + 0.5)
+
+    if (isNull()) return;
+
+    detach();
+
+    // mantissaBits = N
+    // exponentBits = E
+    // exponentMax = Emax
+    // exponentBias = B
+    // maxValue = sharedexp_max
+
+    // max exponent: 5 -> 31, 8 -> 255
+    const int exponentMax = (1 << exponentBits) - 1;
+
+    // exponent bias: 5 -> 15, 8 -> 127
+    const int exponentBias = (1 << (exponentBits - 1)) - 1;
+
+    // Maximum representable value: 5 -> 63488, 8 -> HUGE
+    const float maxValue = float(exponentMax) / float(exponentMax + 1) * float(1 << (exponentMax - exponentBias));
+
+
+    FloatImage * img = m->image;
+    float * r = img->channel(0);
+    float * g = img->channel(1);
+    float * b = img->channel(2);
+    float * a = img->channel(3);
+
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++) {
+        // Clamp components:
+        float R = ::clamp(r[i], 0.0f, maxValue);
+        float G = ::clamp(g[i], 0.0f, maxValue);
+        float B = ::clamp(b[i], 0.0f, maxValue);
+
+        // Compute max:
+        float M = max(R, G, B);
+
+        // Preliminary exponent:
+        float E = max(- exponentBias - 1, ifloor(log2f(M))) + 1 + exponentBias;
+
+        // Refine exponent:
+        int max_s = iround(M / exp2(E - exponentBias - mantissaBits));
+        if (max_s == (1 << mantissaBits)) E += 1.0f;
+
+        R = floatRound(R / exp2(E - exponentBias - mantissaBits));
+        G = floatRound(G / exp2(E - exponentBias - mantissaBits));
+        B = floatRound(B / exp2(E - exponentBias - mantissaBits));
+
+        nvDebugCheck(R >= 0 && R <= ((1 << mantissaBits) - 1));
+        nvDebugCheck(G >= 0 && G <= ((1 << mantissaBits) - 1));
+        nvDebugCheck(B >= 0 && B <= ((1 << mantissaBits) - 1));
+
+        // Store in [0, 1] range.
+        r[i] = R / ((1 << mantissaBits) - 1);
+        g[i] = G / ((1 << mantissaBits) - 1);
+        b[i] = B / ((1 << mantissaBits) - 1);
+        a[i] = E / ((1 << exponentBits) - 1);
+    }
+}
+
+void Surface::fromRGBE(int mantissaBits, int exponentBits)
+{
+    // According to the OpenGL extension:
+    // http://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt
+    //
+    // The 1st, 2nd, 3rd, and 4th components are called
+    // p_red, p_green, p_blue, and p_exp respectively and are treated as
+    // unsigned integers.  These are then used to compute floating-point
+    // RGB components (ignoring the "Conversion to floating-point" section
+    // below in this case) as follows:
+    //
+    // red   = p_red   * 2^(p_exp - B - N)
+    // green = p_green * 2^(p_exp - B - N)
+    // blue  = p_blue  * 2^(p_exp - B - N)
+    //
+    // where B is 15 (the exponent bias) and N is 9 (the number of mantissa
+    // bits)."
+
+    if (isNull()) return;
+
+    detach();
+
+    // exponent bias: 5 -> 15, 8 -> 127
+    const float exponentBias = (1 << (exponentBits - 1)) - 1;
+
+    FloatImage * img = m->image;
+    float * r = img->channel(0);
+    float * g = img->channel(1);
+    float * b = img->channel(2);
+    float * a = img->channel(3);
+
+    const uint count = img->pixelCount();
+    for (uint i = 0; i < count; i++) {
+        // RGBE are assumed to be in the [0, 1] range.
+        float R = r[i] * ((1 << mantissaBits) - 1);
+        float G = g[i] * ((1 << mantissaBits) - 1);
+        float B = b[i] * ((1 << mantissaBits) - 1);
+        float E = a[i] * ((1 << exponentBits) - 1);
+
+        float M = pow(2, E - exponentBias - mantissaBits);
+
+        r[i] = R * M;
+        g[i] = G * M;
+        b[i] = B * M;
+        a[i] = 1;
+    }
+}
+
 // Y is in the [0, 1] range, while CoCg are in the [-1, 1] range.
 void Surface::toYCoCg()
 {
diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h
index 22eacdb..354c5e4 100644
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@@ -70,6 +70,11 @@ namespace nvtt
     struct Surface;
     struct CubeSurface;
 
+
+    // @@ I wish I had distinguished between "formats" and compressors.
+    // That is, 'DXT1' is a format 'DXT1a' and 'DXT1n' are DXT1 compressors.
+    // That is, 'DXT3' is a format 'DXT3n' is a DXT3 compressor.
+    // Having multiple enums for the same ids only creates confusion. Clean this up.
     /// Supported compression formats.
     enum Format
     {
@@ -98,8 +103,6 @@ namespace nvtt
 
         Format_BC6,     // Not supported yet.
         Format_BC7,     // Not supported yet.
-
-        Format_RGBE,
     };
 
     /// Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input.
@@ -472,6 +475,8 @@ namespace nvtt
         //NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax);
         NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f);
         NVTT_API void fromRGBM(float range = 1.0f);
+        NVTT_API void toRGBE(int mantissaBits, int exponentBits);
+        NVTT_API void fromRGBE(int mantissaBits, int exponentBits);
         NVTT_API void toYCoCg();
         NVTT_API void blockScaleCoCg(int bits = 5, float threshold = 0.0f);
         NVTT_API void fromYCoCg();