From dc13d9e9d22d000b14c4eb4911901d9a58d846d8 Mon Sep 17 00:00:00 2001
From: castano <castano@95f4ed2b-212e-0410-8b90-d31948207fce>
Date: Thu, 29 Sep 2011 23:14:18 +0000
Subject: [PATCH] Add support for float10 and float11 formats. Not tested.

---
 src/nvtt/CompressorRGB.cpp | 160 ++++++++++++++++++++++++++++++++-----
 src/nvtt/nvtt.h            |  77 ++++++++++--------
 2 files changed, 184 insertions(+), 53 deletions(-)

diff --git a/src/nvtt/CompressorRGB.cpp b/src/nvtt/CompressorRGB.cpp
index 237b583..130f5ce 100644
--- a/src/nvtt/CompressorRGB.cpp
+++ b/src/nvtt/CompressorRGB.cpp
@@ -40,23 +40,124 @@ using namespace nvtt;
 
 namespace 
 {
-    inline void convert_to_a8r8g8b8(const void * src, void * dst, uint w)
-    {
-        memcpy(dst, src, 4 * w);
+    /* 11 and 10 bit floating point numbers according to the OpenGL packed float extension:
+       http://www.opengl.org/registry/specs/EXT/packed_float.txt
+
+       2.1.A  Unsigned 11-Bit Floating-Point Numbers
+
+        An unsigned 11-bit floating-point number has no sign bit, a 5-bit
+        exponent (E), and a 6-bit mantissa (M).  The value of an unsigned
+        11-bit floating-point number (represented as an 11-bit unsigned
+        integer N) is determined by the following:
+
+            0.0,                      if E == 0 and M == 0,
+            2^-14 * (M / 64),         if E == 0 and M != 0,
+            2^(E-15) * (1 + M/64),    if 0 < E < 31,
+            INF,                      if E == 31 and M == 0, or
+            NaN,                      if E == 31 and M != 0,
+
+        where
+
+            E = floor(N / 64), and
+            M = N mod 64.
+
+        Implementations are also allowed to use any of the following
+        alternative encodings:
+
+            0.0,                      if E == 0 and M != 0
+            2^(E-15) * (1 + M/64)     if E == 31 and M == 0
+            2^(E-15) * (1 + M/64)     if E == 31 and M != 0
+
+        When a floating-point value is converted to an unsigned 11-bit
+        floating-point representation, finite values are rounded to the closet
+        representable finite value.  While less accurate, implementations
+        are allowed to always round in the direction of zero.  This means
+        negative values are converted to zero.  Likewise, finite positive
+        values greater than 65024 (the maximum finite representable unsigned
+        11-bit floating-point value) are converted to 65024.  Additionally:
+        negative infinity is converted to zero; positive infinity is converted
+        to positive infinity; and both positive and negative NaN are converted
+        to positive NaN.
+
+        Any representable unsigned 11-bit floating-point value is legal
+        as input to a GL command that accepts 11-bit floating-point data.
+        The result of providing a value that is not a floating-point number
+        (such as infinity or NaN) to such a command is unspecified, but must
+        not lead to GL interruption or termination.  Providing a denormalized
+        number or negative zero to GL must yield predictable results.
+
+        2.1.B  Unsigned 10-Bit Floating-Point Numbers
+
+        An unsigned 10-bit floating-point number has no sign bit, a 5-bit
+        exponent (E), and a 5-bit mantissa (M).  The value of an unsigned
+        10-bit floating-point number (represented as an 10-bit unsigned
+        integer N) is determined by the following:
+
+            0.0,                      if E == 0 and M == 0,
+            2^-14 * (M / 32),         if E == 0 and M != 0,
+            2^(E-15) * (1 + M/32),    if 0 < E < 31,
+            INF,                      if E == 31 and M == 0, or
+            NaN,                      if E == 31 and M != 0,
+
+        where
+
+            E = floor(N / 32), and
+            M = N mod 32.
+
+        When a floating-point value is converted to an unsigned 10-bit
+        floating-point representation, finite values are rounded to the closet
+        representable finite value.  While less accurate, implementations
+        are allowed to always round in the direction of zero.  This means
+        negative values are converted to zero.  Likewise, finite positive
+        values greater than 64512 (the maximum finite representable unsigned
+        10-bit floating-point value) are converted to 64512.  Additionally:
+        negative infinity is converted to zero; positive infinity is converted
+        to positive infinity; and both positive and negative NaN are converted
+        to positive NaN.
+
+        Any representable unsigned 10-bit floating-point value is legal
+        as input to a GL command that accepts 10-bit floating-point data.
+        The result of providing a value that is not a floating-point number
+        (such as infinity or NaN) to such a command is unspecified, but must
+        not lead to GL interruption or termination.  Providing a denormalized
+        number or negative zero to GL must yield predictable results.
+    */
+
+    // @@ Is this correct? Not tested!
+    // 6 bits of mantissa, 5 bits of exponent.
+    static uint toFloat11(float f) {
+        if (f < 0) f = 0;           // Flush to 0 or to epsilon?
+        if (f > 65024) f = 65024;   // Flush to infinity or max?
+
+        Float754 F;
+        F.value = f;
+
+        uint E = F.field.biasedexponent - 127 + 15;
+        nvDebugCheck(E < 32);
+
+        uint M = F.field.mantissa >> (23 - 6);
+
+        return (E << 6) | M;
     }
 
-    inline void convert_to_x8r8g8b8(const void * src, void * dst, uint w)
-    {
-        memcpy(dst, src, 4 * w);
-    }
+    // @@ Is this correct? Not tested!
+    // 5 bits of mantissa, 5 bits of exponent.
+    static uint toFloat10(float f) {
+        if (f < 0) f = 0;           // Flush to 0 or to epsilon?
+        if (f > 64512) f = 64512;   // Flush to infinity or max?
 
-    static uint16 to_half(float f)
-    {
-        union { float f; uint32 u; } c;
-        c.f = f;
-        return half_from_float(c.u);
+        Float754 F;
+        F.value = f;
+
+        uint E = F.field.biasedexponent - 127 + 15;
+        nvDebugCheck(E < 32);
+
+        uint M = F.field.mantissa >> (23 - 5);
+
+        return (E << 5) | M;
     }
 
+
     struct BitStream
     {
         BitStream(uint8 * ptr) : ptr(ptr), buffer(0), bits(0) {
@@ -84,18 +185,28 @@ namespace
 
         void putFloat(float f)
         {
-            nvDebugCheck(bits == 0);
+            nvDebugCheck(bits == 0); // @@ Do not require alignment.
             *((float *)ptr) = f;
             ptr += 4;
         }
 
         void putHalf(float f)
         {
-            nvDebugCheck(bits == 0);
+            nvDebugCheck(bits == 0); // @@ Do not require alignment.
             *((uint16 *)ptr) = to_half(f);
             ptr += 2;
         }
 
+        void putFloat11(float f)
+        {
+            putBits(toFloat11(f), 11);
+        }
+
+        void putFloat10(float f)
+        {
+            putBits(toFloat10(f), 10);
+        }
+
         void flush()
         {
             nvDebugCheck(bits < 8);
@@ -142,10 +253,11 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
         bsize = compressionOptions.bsize;
         asize = compressionOptions.asize;
 
-        nvCheck(rsize == 0 || rsize == 16 || rsize == 32);
-        nvCheck(gsize == 0 || gsize == 16 || gsize == 32);
-        nvCheck(bsize == 0 || bsize == 16 || bsize == 32);
-        nvCheck(asize == 0 || asize == 16 || asize == 32);
+        // Other float sizes are not supported and will be zero-padded.
+        nvDebugCheck(rsize == 0 || rsize == 10 || rsize == 11 || rsize == 16 || rsize == 32);
+        nvDebugCheck(gsize == 0 || gsize == 10 || gsize == 11 || gsize == 16 || gsize == 32);
+        nvDebugCheck(bsize == 0 || bsize == 10 || bsize == 11 || bsize == 16 || bsize == 32);
+        nvDebugCheck(asize == 0 || asize == 10 || asize == 11 || asize == 16 || asize == 32);
 
         bitCount = rsize + gsize + bsize + asize;
     }
@@ -213,15 +325,27 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
                 {
                     if (rsize == 32) stream.putFloat(r);
                     else if (rsize == 16) stream.putHalf(r);
+                    else if (rsize == 11) stream.putFloat11(r);
+                    else if (rsize == 10) stream.putFloat10(r);
+                    else stream.putBits(0, rsize);
 
                     if (gsize == 32) stream.putFloat(g);
                     else if (gsize == 16) stream.putHalf(g);
+                    else if (gsize == 11) stream.putFloat11(g);
+                    else if (gsize == 10) stream.putFloat10(g);
+                    else stream.putBits(0, gsize);
 
                     if (bsize == 32) stream.putFloat(b);
                     else if (bsize == 16) stream.putHalf(b);
+                    else if (bsize == 11) stream.putFloat11(b);
+                    else if (bsize == 10) stream.putFloat10(b);
+                    else stream.putBits(0, bsize);
 
                     if (asize == 32) stream.putFloat(a);
                     else if (asize == 16) stream.putHalf(a);
+                    else if (asize == 11) stream.putFloat11(a);
+                    else if (asize == 10) stream.putFloat10(a);
+                    else stream.putBits(0, asize);
                 }
                 else
                 {
diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h
index ec1d17f..52e38fa 100644
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@@ -71,11 +71,11 @@ namespace nvtt
     struct CubeSurface;
 
 
+    // Supported compression formats.
     // @@ I wish I had distinguished between "formats" and compressors.
     // That is, 'DXT1' is a format 'DXT1a' and 'DXT1n' are DXT1 compressors.
     // That is, 'DXT3' is a format 'DXT3n' is a DXT3 compressor.
     // Having multiple enums for the same ids only creates confusion. Clean this up.
-    /// Supported compression formats.
     enum Format
     {
         // No compression.
@@ -105,7 +105,7 @@ namespace nvtt
         Format_BC7,     // Not supported yet.
     };
 
-    /// Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input.
+    // Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input. They are only relevant in RGBA mode.
     enum PixelType
     {
         PixelType_UnsignedNorm = 0,
@@ -116,7 +116,7 @@ namespace nvtt
         PixelType_UnsignedFloat = 5,
     };
 
-    /// Quality modes.
+    // Quality modes.
     enum Quality
     {
         Quality_Fastest,
@@ -125,16 +125,17 @@ namespace nvtt
         Quality_Highest,
     };
 
-    /// DXT decoder.
+    // DXT decoder.
     enum Decoder
     {
         Decoder_D3D10,
         Decoder_D3D9,
         Decoder_NV5x,
+        //Decoder_RSX, // To take advantage of DXT5 bug.
     };
 
 
-    /// Compression options. This class describes the desired compression format and other compression settings.
+    // Compression options. This class describes the desired compression format and other compression settings.
     struct CompressionOptions
     {
         NVTT_FORBID_COPY(CompressionOptions);
@@ -159,6 +160,7 @@ namespace nvtt
 
         NVTT_API void setPitchAlignment(int pitchAlignment);
 
+        // @@ I wish this wasn't part of the compression options. Quantization is applied before compression. We don't have compressors with error diffusion.
         NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
 
         NVTT_API void setTargetDecoder(Decoder decoder);
@@ -181,7 +183,7 @@ namespace nvtt
     */
 
 
-    /// Wrap modes.
+    // Wrap modes.
     enum WrapMode
     {
         WrapMode_Clamp,
@@ -189,7 +191,7 @@ namespace nvtt
         WrapMode_Mirror,
     };
 
-    /// Texture types.
+    // Texture types.
     enum TextureType
     {
         TextureType_2D,
@@ -197,23 +199,23 @@ namespace nvtt
         TextureType_3D,
     };
 
-    /// Input formats.
+    // Input formats.
     enum InputFormat
     {
-        InputFormat_BGRA_8UB, // Normalized [0, 1] 8 bit fixed point.
-        InputFormat_RGBA_16F, // 16 bit floating point.
-        InputFormat_RGBA_32F, // 32 bit floating point.
+        InputFormat_BGRA_8UB,   // Normalized [0, 1] 8 bit fixed point.
+        InputFormat_RGBA_16F,   // 16 bit floating point.
+        InputFormat_RGBA_32F,   // 32 bit floating point.
     };
 
-    /// Mipmap downsampling filters.
+    // Mipmap downsampling filters.
     enum MipmapFilter
     {
-        MipmapFilter_Box,       ///< Box filter is quite good and very fast.
-        MipmapFilter_Triangle,  ///< Triangle filter blurs the results too much, but that might be what you want.
-        MipmapFilter_Kaiser,    ///< Kaiser-windowed Sinc filter is the best downsampling filter.
+        MipmapFilter_Box,       // Box filter is quite good and very fast.
+        MipmapFilter_Triangle,  // Triangle filter blurs the results too much, but that might be what you want.
+        MipmapFilter_Kaiser,    // Kaiser-windowed Sinc filter is the best downsampling filter.
     };
 
-    /// Texture resize filters.
+    // Texture resize filters.
     enum ResizeFilter
     {
         ResizeFilter_Box,
@@ -222,7 +224,7 @@ namespace nvtt
         ResizeFilter_Mitchell,
     };
 
-    /// Extents rounding mode.
+    // Extents rounding mode.
     enum RoundMode
     {
         RoundMode_None,
@@ -231,7 +233,7 @@ namespace nvtt
         RoundMode_ToPreviousPowerOfTwo,
     };
 
-    /// Alpha mode.
+    // Alpha mode.
     enum AlphaMode
     {
         AlphaMode_None,
@@ -239,7 +241,7 @@ namespace nvtt
         AlphaMode_Premultiplied,
     };
 
-    /// Input options. Specify format and layout of the input texture.
+    // Input options. Specify format and layout of the input texture.
     struct InputOptions
     {
         NVTT_FORBID_COPY(InputOptions);
@@ -288,22 +290,22 @@ namespace nvtt
     };
 
 
-    /// Output handler.
+    // Output handler.
     struct OutputHandler
     {
         virtual ~OutputHandler() {}
 
-        /// Indicate the start of a new compressed image that's part of the final texture.
+        // Indicate the start of a new compressed image that's part of the final texture.
         virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) = 0;
 
-        /// Output data. Compressed data is output as soon as it's generated to minimize memory allocations.
+        // Output data. Compressed data is output as soon as it's generated to minimize memory allocations.
         virtual bool writeData(const void * data, int size) = 0;
 
-        /// Indicate the end of a the compressed image.
+        // Indicate the end of a the compressed image.
         virtual void endImage() = 0;
     };
 
-    /// Error codes.
+    // Error codes.
     enum Error
     {
         Error_Unknown,
@@ -315,7 +317,7 @@ namespace nvtt
         Error_UnsupportedOutputFormat,
     };
 
-    /// Error handler.
+    // Error handler.
     struct ErrorHandler
     {
         virtual ~ErrorHandler() {}
@@ -324,16 +326,18 @@ namespace nvtt
         virtual void error(Error e) = 0;
     };
 
-    /// Container.
+    // Container.
     enum Container
     {
         Container_DDS,
         Container_DDS10,
+        // Container_KTX,   // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/
+        // Container_VTF,   // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format
     };
 
 
-    /// Output Options. This class holds pointers to the interfaces that are used to report the output of
-    /// the compressor to the user.
+    // Output Options. This class holds pointers to the interfaces that are used to report the output of
+    // the compressor to the user.
     struct OutputOptions
     {
         NVTT_FORBID_COPY(OutputOptions);
@@ -363,7 +367,7 @@ namespace nvtt
         virtual void dispatch(Task * task, void * context, int count) = 0;
     };
 
-    /// Context.
+    // Context.
     struct Compressor
     {
         NVTT_FORBID_COPY(Compressor);
@@ -404,12 +408,13 @@ namespace nvtt
         NormalTransform_Orthographic,
         NormalTransform_Stereographic,
         NormalTransform_Paraboloid,
-        NormalTransform_Quartic,
+        NormalTransform_Quartic
         //NormalTransform_DualParaboloid,
     };
 
 
-    /// A surface is a texture mipmap. Can be 2D or 3D.
+    // A surface is one level of a 2D or 3D texture.
+    // @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression.
     struct Surface
     {
         NVTT_API Surface();
@@ -519,15 +524,16 @@ namespace nvtt
     };
 
 
+    // Cube layout formats.
     enum CubeLayout {
         CubeLayout_VerticalCross,
         CubeLayout_HorizontalCross,
         CubeLayout_Column,
         CubeLayout_Row,
-        CubeLayout_LatitudeLongitude,
+        CubeLayout_LatitudeLongitude
     };
 
-    /// A cubemap mipmap. CubeSurface?
+    // A CubeSurface is one level of a cube map texture.
     struct CubeSurface
     {
         NVTT_API CubeSurface();
@@ -548,7 +554,7 @@ namespace nvtt
         Surface & face(int face);
         const Surface & face(int face) const;
 
-        // Layout conversion.
+        // Layout conversion. @@ Not implemented.
         void fold(const Surface & img, CubeLayout layout);
         Surface unfold(CubeLayout layout) const;
 
@@ -556,11 +562,12 @@ namespace nvtt
 
         // @@ Add resizing methods.
 
+        // @@ Add edge fixup methods.
+
         // Filtering.
         CubeSurface irradianceFilter(int size) const;
         CubeSurface cosinePowerFilter(int size, float cosinePower) const;
 
-
         /*
         NVTT_API void resize(int w, int h, ResizeFilter filter);
         NVTT_API void resize(int w, int h, ResizeFilter filter, float filterWidth, const float * params = 0);