569 lines
20 KiB
C++
569 lines
20 KiB
C++
// Copyright (c) 2009-2011 Ignacio Castano <castano@gmail.com>
|
|
// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano <icastano@nvidia.com>
|
|
//
|
|
// Permission is hereby granted, free of charge, to any person
|
|
// obtaining a copy of this software and associated documentation
|
|
// files (the "Software"), to deal in the Software without
|
|
// restriction, including without limitation the rights to use,
|
|
// copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
// copies of the Software, and to permit persons to whom the
|
|
// Software is furnished to do so, subject to the following
|
|
// conditions:
|
|
//
|
|
// The above copyright notice and this permission notice shall be
|
|
// included in all copies or substantial portions of the Software.
|
|
//
|
|
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
|
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
|
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
|
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
|
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
|
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
|
// OTHER DEALINGS IN THE SOFTWARE.
|
|
|
|
#include "CompressorRGB.h"
|
|
#include "CompressionOptions.h"
|
|
#include "OutputOptions.h"
|
|
|
|
#include "nvimage/Image.h"
|
|
#include "nvimage/FloatImage.h"
|
|
#include "nvimage/PixelFormat.h"
|
|
|
|
#include "nvmath/Color.h"
|
|
#include "nvmath/Half.h"
|
|
#include "nvmath/ftoi.h"
|
|
#include "nvmath/Vector.inl"
|
|
|
|
#include "nvcore/Debug.h"
|
|
|
|
using namespace nv;
|
|
using namespace nvtt;
|
|
|
|
namespace
|
|
{
|
|
/* 11 and 10 bit floating point numbers according to the OpenGL packed float extension:
|
|
http://www.opengl.org/registry/specs/EXT/packed_float.txt
|
|
|
|
2.1.A Unsigned 11-Bit Floating-Point Numbers
|
|
|
|
An unsigned 11-bit floating-point number has no sign bit, a 5-bit
|
|
exponent (E), and a 6-bit mantissa (M). The value of an unsigned
|
|
11-bit floating-point number (represented as an 11-bit unsigned
|
|
integer N) is determined by the following:
|
|
|
|
0.0, if E == 0 and M == 0,
|
|
2^-14 * (M / 64), if E == 0 and M != 0,
|
|
2^(E-15) * (1 + M/64), if 0 < E < 31,
|
|
INF, if E == 31 and M == 0, or
|
|
NaN, if E == 31 and M != 0,
|
|
|
|
where
|
|
|
|
E = floor(N / 64), and
|
|
M = N mod 64.
|
|
|
|
Implementations are also allowed to use any of the following
|
|
alternative encodings:
|
|
|
|
0.0, if E == 0 and M != 0
|
|
2^(E-15) * (1 + M/64) if E == 31 and M == 0
|
|
2^(E-15) * (1 + M/64) if E == 31 and M != 0
|
|
|
|
When a floating-point value is converted to an unsigned 11-bit
|
|
floating-point representation, finite values are rounded to the closet
|
|
representable finite value. While less accurate, implementations
|
|
are allowed to always round in the direction of zero. This means
|
|
negative values are converted to zero. Likewise, finite positive
|
|
values greater than 65024 (the maximum finite representable unsigned
|
|
11-bit floating-point value) are converted to 65024. Additionally:
|
|
negative infinity is converted to zero; positive infinity is converted
|
|
to positive infinity; and both positive and negative NaN are converted
|
|
to positive NaN.
|
|
|
|
Any representable unsigned 11-bit floating-point value is legal
|
|
as input to a GL command that accepts 11-bit floating-point data.
|
|
The result of providing a value that is not a floating-point number
|
|
(such as infinity or NaN) to such a command is unspecified, but must
|
|
not lead to GL interruption or termination. Providing a denormalized
|
|
number or negative zero to GL must yield predictable results.
|
|
|
|
2.1.B Unsigned 10-Bit Floating-Point Numbers
|
|
|
|
An unsigned 10-bit floating-point number has no sign bit, a 5-bit
|
|
exponent (E), and a 5-bit mantissa (M). The value of an unsigned
|
|
10-bit floating-point number (represented as an 10-bit unsigned
|
|
integer N) is determined by the following:
|
|
|
|
0.0, if E == 0 and M == 0,
|
|
2^-14 * (M / 32), if E == 0 and M != 0,
|
|
2^(E-15) * (1 + M/32), if 0 < E < 31,
|
|
INF, if E == 31 and M == 0, or
|
|
NaN, if E == 31 and M != 0,
|
|
|
|
where
|
|
|
|
E = floor(N / 32), and
|
|
M = N mod 32.
|
|
|
|
When a floating-point value is converted to an unsigned 10-bit
|
|
floating-point representation, finite values are rounded to the closet
|
|
representable finite value. While less accurate, implementations
|
|
are allowed to always round in the direction of zero. This means
|
|
negative values are converted to zero. Likewise, finite positive
|
|
values greater than 64512 (the maximum finite representable unsigned
|
|
10-bit floating-point value) are converted to 64512. Additionally:
|
|
negative infinity is converted to zero; positive infinity is converted
|
|
to positive infinity; and both positive and negative NaN are converted
|
|
to positive NaN.
|
|
|
|
Any representable unsigned 10-bit floating-point value is legal
|
|
as input to a GL command that accepts 10-bit floating-point data.
|
|
The result of providing a value that is not a floating-point number
|
|
(such as infinity or NaN) to such a command is unspecified, but must
|
|
not lead to GL interruption or termination. Providing a denormalized
|
|
number or negative zero to GL must yield predictable results.
|
|
*/
|
|
|
|
// @@ Is this correct? Not tested!
|
|
// 6 bits of mantissa, 5 bits of exponent.
|
|
static uint toFloat11(float f) {
|
|
if (f < 0) f = 0; // Flush to 0 or to epsilon?
|
|
if (f > 65024) f = 65024; // Flush to infinity or max?
|
|
|
|
Float754 F;
|
|
F.value = f;
|
|
|
|
uint E = F.field.biasedexponent - 127 + 15;
|
|
nvDebugCheck(E < 32);
|
|
|
|
uint M = F.field.mantissa >> (23 - 6);
|
|
|
|
return (E << 6) | M;
|
|
}
|
|
|
|
// @@ Is this correct? Not tested!
|
|
// 5 bits of mantissa, 5 bits of exponent.
|
|
static uint toFloat10(float f) {
|
|
if (f < 0) f = 0; // Flush to 0 or to epsilon?
|
|
if (f > 64512) f = 64512; // Flush to infinity or max?
|
|
|
|
Float754 F;
|
|
F.value = f;
|
|
|
|
uint E = F.field.biasedexponent - 127 + 15;
|
|
nvDebugCheck(E < 32);
|
|
|
|
uint M = F.field.mantissa >> (23 - 5);
|
|
|
|
return (E << 5) | M;
|
|
}
|
|
|
|
|
|
// IC: Inf/NaN and denormal handling based on DirectXMath.
|
|
static float fromFloat11(uint u) {
|
|
// 5 bit exponent
|
|
// 6 bit mantissa
|
|
|
|
uint E = (u >> 6) & 0x1F;
|
|
uint M = u & 0x3F;
|
|
|
|
Float754 F;
|
|
F.field.negative = 0;
|
|
|
|
if (E == 0x1f) { // INF or NAN.
|
|
E = 0xFF;
|
|
}
|
|
else {
|
|
if (E != 0) {
|
|
F.field.biasedexponent = E + 127 - 15;
|
|
F.field.mantissa = M << (23 - 6);
|
|
}
|
|
else if (M != 0) {
|
|
E = 1;
|
|
do {
|
|
E--;
|
|
M <<= 1;
|
|
} while((M & 0x40) == 0);
|
|
|
|
M &= 0x3F;
|
|
}
|
|
}
|
|
|
|
F.field.biasedexponent = 0xFF;
|
|
F.field.mantissa = M << (23 - 6);
|
|
|
|
return F.value;
|
|
#if 0
|
|
// X Channel (6-bit mantissa)
|
|
Mantissa = pSource->xm;
|
|
|
|
if ( pSource->xe == 0x1f ) // INF or NAN
|
|
{
|
|
Result[0] = 0x7f800000 | (pSource->xm << 17);
|
|
}
|
|
else
|
|
{
|
|
if ( pSource->xe != 0 ) // The value is normalized
|
|
{
|
|
Exponent = pSource->xe;
|
|
}
|
|
else if (Mantissa != 0) // The value is denormalized
|
|
{
|
|
// Normalize the value in the resulting float
|
|
Exponent = 1;
|
|
|
|
do
|
|
{
|
|
Exponent--;
|
|
Mantissa <<= 1;
|
|
} while ((Mantissa & 0x40) == 0);
|
|
|
|
Mantissa &= 0x3F;
|
|
}
|
|
else // The value is zero
|
|
{
|
|
Exponent = (uint32_t)-112;
|
|
}
|
|
|
|
Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
// https://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt
|
|
Float3SE toFloat3SE(float r, float g, float b)
|
|
{
|
|
const int N = 9; // Mantissa bits.
|
|
const int E = 5; // Exponent bits.
|
|
const int Emax = (1 << E) - 1; // 31
|
|
const int B = (1 << (E-1)) - 1; // 15
|
|
const float sharedexp_max = float((1 << N) - 1) / (1 << N) * (1 << (Emax-B)); // 65408
|
|
|
|
// Clamp color components.
|
|
r = max(0.0f, min(sharedexp_max, r));
|
|
g = max(0.0f, min(sharedexp_max, g));
|
|
b = max(0.0f, min(sharedexp_max, b));
|
|
|
|
// Get max component.
|
|
float max_c = max3(r, g, b);
|
|
|
|
// Compute shared exponent.
|
|
int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B;
|
|
|
|
int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N)));
|
|
|
|
int exp_shared = exp_shared_p;
|
|
if (max_s == (1 << N)) exp_shared++;
|
|
|
|
Float3SE v;
|
|
v.e = exp_shared;
|
|
|
|
// Compute mantissas.
|
|
v.xm = ftoi_round(r / (1 << (exp_shared - B - N)));
|
|
v.ym = ftoi_round(g / (1 << (exp_shared - B - N)));
|
|
v.zm = ftoi_round(b / (1 << (exp_shared - B - N)));
|
|
|
|
return v;
|
|
}
|
|
|
|
Vector3 fromFloat3SE(Float3SE v) {
|
|
Float754 f;
|
|
f.raw = 0x33800000 + (v.e << 23);
|
|
float scale = f.value;
|
|
return scale * Vector3(float(v.xm), float(v.ym), float(v.zm));
|
|
}
|
|
|
|
// These are based on: http://www.graphics.cornell.edu/~bjw/rgbe/rgbe.c
|
|
uint toRGBE(float r, float g, float b)
|
|
{
|
|
float v = max3(r, g, b);
|
|
|
|
uint rgbe;
|
|
|
|
if (v < 1e-32) {
|
|
rgbe = 0;
|
|
}
|
|
else {
|
|
int e;
|
|
float scale = frexpf(v, &e) * 256.0f / v;
|
|
//Float754 f;
|
|
//f.value = v;
|
|
//float scale = f.field.biasedexponent * 256.0f / v;
|
|
//e = f.field.biasedexponent - 127
|
|
|
|
rgbe |= U8(ftoi_round(r * scale)) << 0;
|
|
rgbe |= U8(ftoi_round(g * scale)) << 8;
|
|
rgbe |= U8(ftoi_round(b * scale)) << 16;
|
|
rgbe |= U8(e + 128) << 24;
|
|
}
|
|
|
|
return rgbe;
|
|
}
|
|
|
|
Vector3 fromRGBE(uint rgbe) {
|
|
uint r = (rgbe >> 0) & 0xFF;
|
|
uint g = (rgbe >> 8) & 0xFF;
|
|
uint b = (rgbe >> 16) & 0xFF;
|
|
uint e = (rgbe >> 24);
|
|
|
|
if (e != 0) {
|
|
float scale = ldexpf(1.0f, e-(int)(128+8)); // +8 to divide by 256. @@ Shouldn't we divide by 255 instead?
|
|
return scale * Vector3(float(r), float(g), float(b));
|
|
}
|
|
|
|
return Vector3(0);
|
|
}
|
|
|
|
|
|
struct BitStream
|
|
{
|
|
BitStream(uint8 * ptr) : ptr(ptr), buffer(0), bits(0) {
|
|
}
|
|
|
|
void putBits(uint p, int bitCount)
|
|
{
|
|
nvDebugCheck(bits < 8);
|
|
nvDebugCheck(bitCount <= 32);
|
|
|
|
uint64 buffer = (this->buffer << bitCount) | p;
|
|
uint bits = this->bits + bitCount;
|
|
|
|
while (bits >= 8)
|
|
{
|
|
*ptr++ = (buffer & 0xFF);
|
|
|
|
buffer >>= 8;
|
|
bits -= 8;
|
|
}
|
|
|
|
this->buffer = (uint8)buffer;
|
|
this->bits = bits;
|
|
}
|
|
|
|
void putFloat(float f)
|
|
{
|
|
nvDebugCheck(bits == 0); // @@ Do not require alignment.
|
|
*((float *)ptr) = f;
|
|
ptr += 4;
|
|
}
|
|
|
|
void putHalf(float f)
|
|
{
|
|
nvDebugCheck(bits == 0); // @@ Do not require alignment.
|
|
*((uint16 *)ptr) = to_half(f);
|
|
ptr += 2;
|
|
}
|
|
|
|
void putFloat11(float f)
|
|
{
|
|
putBits(toFloat11(f), 11);
|
|
}
|
|
|
|
void putFloat10(float f)
|
|
{
|
|
putBits(toFloat10(f), 10);
|
|
}
|
|
|
|
void flush()
|
|
{
|
|
nvDebugCheck(bits < 8);
|
|
if (bits) {
|
|
*ptr++ = buffer;
|
|
buffer = 0;
|
|
bits = 0;
|
|
}
|
|
}
|
|
|
|
void align(int alignment)
|
|
{
|
|
nvDebugCheck(alignment >= 1);
|
|
flush();
|
|
int remainder = (int)((uintptr_t)ptr % alignment);
|
|
if (remainder != 0) {
|
|
putBits(0, (alignment - remainder) * 8);
|
|
}
|
|
}
|
|
|
|
uint8 * ptr;
|
|
uint8 buffer;
|
|
uint8 bits;
|
|
};
|
|
|
|
} // namespace
|
|
|
|
|
|
|
|
void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
|
{
|
|
nvDebugCheck (compressionOptions.format == nvtt::Format_RGBA);
|
|
|
|
uint bitCount;
|
|
uint rmask, rshift, rsize;
|
|
uint gmask, gshift, gsize;
|
|
uint bmask, bshift, bsize;
|
|
uint amask, ashift, asize;
|
|
|
|
if (compressionOptions.pixelType == nvtt::PixelType_Float)
|
|
{
|
|
rsize = compressionOptions.rsize;
|
|
gsize = compressionOptions.gsize;
|
|
bsize = compressionOptions.bsize;
|
|
asize = compressionOptions.asize;
|
|
|
|
// Other float sizes are not supported and will be zero-padded.
|
|
nvDebugCheck(rsize == 0 || rsize == 10 || rsize == 11 || rsize == 16 || rsize == 32);
|
|
nvDebugCheck(gsize == 0 || gsize == 10 || gsize == 11 || gsize == 16 || gsize == 32);
|
|
nvDebugCheck(bsize == 0 || bsize == 10 || bsize == 11 || bsize == 16 || bsize == 32);
|
|
nvDebugCheck(asize == 0 || asize == 10 || asize == 11 || asize == 16 || asize == 32);
|
|
|
|
bitCount = rsize + gsize + bsize + asize;
|
|
}
|
|
else
|
|
{
|
|
if (compressionOptions.bitcount != 0)
|
|
{
|
|
bitCount = compressionOptions.bitcount;
|
|
nvCheck(bitCount <= 32);
|
|
|
|
rmask = compressionOptions.rmask;
|
|
gmask = compressionOptions.gmask;
|
|
bmask = compressionOptions.bmask;
|
|
amask = compressionOptions.amask;
|
|
|
|
PixelFormat::maskShiftAndSize(rmask, &rshift, &rsize);
|
|
PixelFormat::maskShiftAndSize(gmask, &gshift, &gsize);
|
|
PixelFormat::maskShiftAndSize(bmask, &bshift, &bsize);
|
|
PixelFormat::maskShiftAndSize(amask, &ashift, &asize);
|
|
}
|
|
else
|
|
{
|
|
rsize = compressionOptions.rsize;
|
|
gsize = compressionOptions.gsize;
|
|
bsize = compressionOptions.bsize;
|
|
asize = compressionOptions.asize;
|
|
|
|
bitCount = rsize + gsize + bsize + asize;
|
|
nvCheck(bitCount <= 32);
|
|
|
|
ashift = 0;
|
|
bshift = ashift + asize;
|
|
gshift = bshift + bsize;
|
|
rshift = gshift + gsize;
|
|
|
|
rmask = ((1 << rsize) - 1) << rshift;
|
|
gmask = ((1 << gsize) - 1) << gshift;
|
|
bmask = ((1 << bsize) - 1) << bshift;
|
|
amask = ((1 << asize) - 1) << ashift;
|
|
}
|
|
}
|
|
|
|
const uint pitch = computeBytePitch(w, bitCount, compressionOptions.pitchAlignment);
|
|
const uint whd = w * h * d;
|
|
|
|
// Allocate output scanline.
|
|
uint8 * const dst = malloc<uint8>(pitch);
|
|
|
|
for (uint z = 0; z < d; z++)
|
|
{
|
|
for (uint y = 0; y < h; y++)
|
|
{
|
|
const float * src = (const float *)data + (z * h + y) * w;
|
|
|
|
BitStream stream(dst);
|
|
|
|
for (uint x = 0; x < w; x++)
|
|
{
|
|
float r = src[x + 0 * whd];
|
|
float g = src[x + 1 * whd];
|
|
float b = src[x + 2 * whd];
|
|
float a = src[x + 3 * whd];
|
|
|
|
if (compressionOptions.pixelType == nvtt::PixelType_Float)
|
|
{
|
|
if (rsize == 32) stream.putFloat(r);
|
|
else if (rsize == 16) stream.putHalf(r);
|
|
else if (rsize == 11) stream.putFloat11(r);
|
|
else if (rsize == 10) stream.putFloat10(r);
|
|
else stream.putBits(0, rsize);
|
|
|
|
if (gsize == 32) stream.putFloat(g);
|
|
else if (gsize == 16) stream.putHalf(g);
|
|
else if (gsize == 11) stream.putFloat11(g);
|
|
else if (gsize == 10) stream.putFloat10(g);
|
|
else stream.putBits(0, gsize);
|
|
|
|
if (bsize == 32) stream.putFloat(b);
|
|
else if (bsize == 16) stream.putHalf(b);
|
|
else if (bsize == 11) stream.putFloat11(b);
|
|
else if (bsize == 10) stream.putFloat10(b);
|
|
else stream.putBits(0, bsize);
|
|
|
|
if (asize == 32) stream.putFloat(a);
|
|
else if (asize == 16) stream.putHalf(a);
|
|
else if (asize == 11) stream.putFloat11(a);
|
|
else if (asize == 10) stream.putFloat10(a);
|
|
else stream.putBits(0, asize);
|
|
}
|
|
else if (compressionOptions.pixelType == nvtt::PixelType_SharedExp)
|
|
{
|
|
if (rsize == 9 && gsize == 9 && bsize == 9 && asize == 5) {
|
|
Float3SE v = toFloat3SE(r, g, b);
|
|
stream.putBits(v.v, 32);
|
|
}
|
|
else if (rsize == 8 && gsize == 8 && bsize == 8 && asize == 8) {
|
|
// @@
|
|
}
|
|
else {
|
|
// @@ Not supported. Filling with zeros.
|
|
stream.putBits(0, bitCount);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
// We first convert to 16 bits, then to the target size. @@ If greater than 16 bits, this will truncate and bitexpand.
|
|
|
|
// @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt
|
|
|
|
int ir, ig, ib, ia;
|
|
if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) {
|
|
ir = iround(clamp(r * 65535.0f, 0.0f, 65535.0f));
|
|
ig = iround(clamp(g * 65535.0f, 0.0f, 65535.0f));
|
|
ib = iround(clamp(b * 65535.0f, 0.0f, 65535.0f));
|
|
ia = iround(clamp(a * 65535.0f, 0.0f, 65535.0f));
|
|
}
|
|
else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) {
|
|
// @@
|
|
}
|
|
else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) {
|
|
ir = iround(clamp(r, 0.0f, 65535.0f));
|
|
ig = iround(clamp(g, 0.0f, 65535.0f));
|
|
ib = iround(clamp(b, 0.0f, 65535.0f));
|
|
ia = iround(clamp(a, 0.0f, 65535.0f));
|
|
}
|
|
else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) {
|
|
// @@
|
|
}
|
|
|
|
uint p = 0;
|
|
p |= PixelFormat::convert(ir, 16, rsize) << rshift;
|
|
p |= PixelFormat::convert(ig, 16, gsize) << gshift;
|
|
p |= PixelFormat::convert(ib, 16, bsize) << bshift;
|
|
p |= PixelFormat::convert(ia, 16, asize) << ashift;
|
|
|
|
stream.putBits(p, bitCount);
|
|
}
|
|
}
|
|
|
|
// Zero padding.
|
|
stream.align(compressionOptions.pitchAlignment);
|
|
nvDebugCheck(stream.ptr == dst + pitch);
|
|
|
|
// Scanlines are always byte-aligned.
|
|
outputOptions.writeData(dst, pitch);
|
|
}
|
|
}
|
|
|
|
free(dst);
|
|
}
|