Add BC7 support. It's incredibly slow - ~60 seconds to compress a 512x512 image, on a Core i7 - but it works.

- Added AVPCL compressor to projects and got it building with VC9 and VC10. - Removed unused command line interface & file read/write code from AVPCL. - Convert AVPCL to use NV vector math lib, asserts, etc. - Convert AVPCL to use double instead of float. - Added 4x4 symmetric eigensolver, for AVPCL; it's based on the existing 3x3 one, but I had to rewrite the Householder reduction stage. As with ZOH, using the eigensolver (instead of SVD) gives a ~25% speedup without significantly affecting RMSE. - Encapsulate ZOH and AVPCL stuff into their own namespaces to keep everything separate. - Added some missing vector operators to the nvmath lib.
2013-12-07 02:17:08 +00:00
parent f2fa0517b5
commit ab316deeaa
86 changed files with 2944 additions and 11081 deletions
--- a/src/nvimage/BlockDXT.cpp
+++ b/src/nvimage/BlockDXT.cpp
@ -27,9 +27,10 @@
 #include "nvcore/Stream.h"
 #include "nvcore/Utils.h" // swap
 #include "nvmath/Half.h"
+#include "nvmath/Vector.inl"

 #include "nvtt/bc6h/zoh.h"
-#include "nvtt/bc6h/utils.h"
+#include "nvtt/bc7/avpcl.h"


 using namespace nv;
@ -617,7 +618,7 @@ void BlockCTX1::setIndices(int * idx)
 /// Decode BC6 block.
 void BlockBC6::decodeBlock(ColorSet * set) const
 {
-	Tile tile(4, 4);
+	ZOH::Tile tile(4, 4);
 	ZOH::decompress((const char *)data, tile);

 	// Convert ZOH's tile struct back to NVTT's, and convert half to float.
@ -626,9 +627,9 @@ void BlockBC6::decodeBlock(ColorSet * set) const
 	{
 		for (uint x = 0; x < 4; ++x)
 		{
-			uint16 rHalf = Tile::float2half(tile.data[y][x].x);
-			uint16 gHalf = Tile::float2half(tile.data[y][x].y);
-			uint16 bHalf = Tile::float2half(tile.data[y][x].z);
+			uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x);
+			uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y);
+			uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z);
 			set->colors[y * 4 + x].x = to_float(rHalf);
 			set->colors[y * 4 + x].y = to_float(gHalf);
 			set->colors[y * 4 + x].z = to_float(bHalf);
@ -641,6 +642,26 @@ void BlockBC6::decodeBlock(ColorSet * set) const
 }


+/// Decode BC7 block.
+void BlockBC7::decodeBlock(ColorBlock * block) const
+{
+	AVPCL::Tile tile(4, 4);
+	AVPCL::decompress((const char *)data, tile);
+
+	// Convert AVPCL's tile struct back to NVTT's.
+	for (uint y = 0; y < 4; ++y)
+	{
+		for (uint x = 0; x < 4; ++x)
+		{
+			Vector4 rgba = tile.data[y][x];
+			// Note: decoded rgba values are in [0, 255] range and should be an integer,
+			// because BC7 never uses more than 8 bits per channel.  So no need to round.
+			block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w));
+		}
+	}
+}
+
+
 /// Flip CTX1 block vertically.
 inline void BlockCTX1::flip4()
 {
@ -707,3 +728,9 @@ Stream & nv::operator<<(Stream & stream, BlockBC6 & block)
    stream.serialize(&block, sizeof(block));
    return stream;
 }
+
+Stream & nv::operator<<(Stream & stream, BlockBC7 & block)
+{
+    stream.serialize(&block, sizeof(block));
+    return stream;
+}
--- a/src/nvimage/BlockDXT.h
+++ b/src/nvimage/BlockDXT.h
@ -220,7 +220,13 @@ namespace nv
 		void decodeBlock(ColorSet * set) const;
 	};

-	/// !!!UNDONE: BC7 block
+	/// BC7 block.
+	struct BlockBC7
+	{
+		uint8 data[16];		// Not even going to try to write a union for this thing.
+		void decodeBlock(ColorBlock * block) const;
+	};
+


    // Serialization functions.
@ -233,6 +239,7 @@ namespace nv
    NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block);
    NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block);
    NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC6 & block);
+    NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC7 & block);

 } // nv namespace

--- a/src/nvimage/DirectDrawSurface.cpp
+++ b/src/nvimage/DirectDrawSurface.cpp
@ -1410,6 +1410,12 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
 			}
 		}
 	}
+    else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM)
+    {
+        BlockBC7 block;
+        *stream << block;
+        block.decodeBlock(rgba);
+    }
 	else
 	{
 		nvDebugCheck(false);