Add BC7 support. It's incredibly slow - ~60 seconds to compress a 512x512 image, on a Core i7 - but it works.

- Added AVPCL compressor to projects and got it building with VC9 and VC10.
- Removed unused command line interface & file read/write code from AVPCL.
- Convert AVPCL to use NV vector math lib, asserts, etc.
- Convert AVPCL to use double instead of float.
- Added 4x4 symmetric eigensolver, for AVPCL; it's based on the existing 3x3 one, but I had to rewrite the Householder reduction stage.  As with ZOH, using the eigensolver (instead of SVD) gives a ~25% speedup without significantly affecting RMSE.
- Encapsulate ZOH and AVPCL stuff into their own namespaces to keep everything separate.
- Added some missing vector operators to the nvmath lib.
This commit is contained in:
nathaniel.reed@gmail.com
2013-12-07 02:17:08 +00:00
parent f2fa0517b5
commit ab316deeaa
86 changed files with 2944 additions and 11081 deletions

View File

@ -27,9 +27,10 @@
#include "nvcore/Stream.h"
#include "nvcore/Utils.h" // swap
#include "nvmath/Half.h"
#include "nvmath/Vector.inl"
#include "nvtt/bc6h/zoh.h"
#include "nvtt/bc6h/utils.h"
#include "nvtt/bc7/avpcl.h"
using namespace nv;
@ -617,7 +618,7 @@ void BlockCTX1::setIndices(int * idx)
/// Decode BC6 block.
void BlockBC6::decodeBlock(ColorSet * set) const
{
Tile tile(4, 4);
ZOH::Tile tile(4, 4);
ZOH::decompress((const char *)data, tile);
// Convert ZOH's tile struct back to NVTT's, and convert half to float.
@ -626,9 +627,9 @@ void BlockBC6::decodeBlock(ColorSet * set) const
{
for (uint x = 0; x < 4; ++x)
{
uint16 rHalf = Tile::float2half(tile.data[y][x].x);
uint16 gHalf = Tile::float2half(tile.data[y][x].y);
uint16 bHalf = Tile::float2half(tile.data[y][x].z);
uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x);
uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y);
uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z);
set->colors[y * 4 + x].x = to_float(rHalf);
set->colors[y * 4 + x].y = to_float(gHalf);
set->colors[y * 4 + x].z = to_float(bHalf);
@ -641,6 +642,26 @@ void BlockBC6::decodeBlock(ColorSet * set) const
}
/// Decode BC7 block.
void BlockBC7::decodeBlock(ColorBlock * block) const
{
AVPCL::Tile tile(4, 4);
AVPCL::decompress((const char *)data, tile);
// Convert AVPCL's tile struct back to NVTT's.
for (uint y = 0; y < 4; ++y)
{
for (uint x = 0; x < 4; ++x)
{
Vector4 rgba = tile.data[y][x];
// Note: decoded rgba values are in [0, 255] range and should be an integer,
// because BC7 never uses more than 8 bits per channel. So no need to round.
block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w));
}
}
}
/// Flip CTX1 block vertically.
inline void BlockCTX1::flip4()
{
@ -707,3 +728,9 @@ Stream & nv::operator<<(Stream & stream, BlockBC6 & block)
stream.serialize(&block, sizeof(block));
return stream;
}
Stream & nv::operator<<(Stream & stream, BlockBC7 & block)
{
stream.serialize(&block, sizeof(block));
return stream;
}

View File

@ -220,7 +220,13 @@ namespace nv
void decodeBlock(ColorSet * set) const;
};
/// !!!UNDONE: BC7 block
/// BC7 block.
struct BlockBC7
{
uint8 data[16]; // Not even going to try to write a union for this thing.
void decodeBlock(ColorBlock * block) const;
};
// Serialization functions.
@ -233,6 +239,7 @@ namespace nv
NVIMAGE_API Stream & operator<<(Stream & stream, BlockATI2 & block);
NVIMAGE_API Stream & operator<<(Stream & stream, BlockCTX1 & block);
NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC6 & block);
NVIMAGE_API Stream & operator<<(Stream & stream, BlockBC7 & block);
} // nv namespace

View File

@ -1410,6 +1410,12 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
}
}
}
else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM)
{
BlockBC7 block;
*stream << block;
block.decodeBlock(rgba);
}
else
{
nvDebugCheck(false);