You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
nvidia-texture-tools/src/nvtt/CompressorDXT5_RGBM.cpp

428 lines
13 KiB
C++

#include "CompressorDXT5_RGBM.h"
#include "CompressorDXT1.h"
#include "OptimalCompressDXT.h"
#include "QuickCompressDXT.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include "nvmath/Color.inl"
#include "nvmath/Vector.inl"
#include "nvmath/Fitting.h"
#include "nvmath/ftoi.h"
#include "nvthread/Atomic.h"
#include <stdio.h>
using namespace nv;
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
float weight_sum = 0;
for (uint i = 0; i < 16; i++) {
const Vector4 & c = input_colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = (M - min_m) / (1 - min_m);
input_colors_rgbm[i] = Vector4(r, g, b, a);
rgb_weights[i] = input_weights[i] * M;
weight_sum += input_weights[i];
}
if (weight_sum == 0) {
for (uint i = 0; i < 16; i++) rgb_weights[i] = 1;
}
// Compress RGB.
compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color);
// Decompress RGB/M block.
nv::ColorBlock RGB;
output->color.decodeBlock(&RGB);
// Compute M values to compensate for RGB's error.
AlphaBlock4x4 M;
for (int i = 0; i < 16; i++) {
const Vector4 & c = input_colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float rm = RGB.color(i).r / 255.0f;
float gm = RGB.color(i).g / 255.0f;
float bm = RGB.color(i).b / 255.0f;
// compute m such that m * (r/M, g/M, b/M) == RGB
// Three equations, one unknown:
// m * r/M == R
// m * g/M == G
// m * b/M == B
// Solve in the least squares sense!
// m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T
// m == dot(rgb, RGB) / dot(rgb, rgb)
float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm));
m = (m - min_m) / (1 - min_m);
#if 0
// IC: This does indeed happen. What does that mean? The best choice of m is above the available range. If this happened too often it would make sense to scale m in
// the pixel shader to allow for more accurate reconstruction. However, that scaling would reduce the precision over the [0-1] range. I haven't measured how much
// error is introduced by the clamping vs. how much the error would change with the increased range.
if (m > 1.0f) {
uint counter = atomicIncrement(&atomic_counter);
printf("It happens %u times!", counter);
}
#endif
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = input_weights[i];
}
// Compress M.
//if (compressionOptions.quality == Quality_Fastest) {
// QuickCompress::compressDXT5A(M, &output->alpha);
/*}
else {*/
OptimalCompress::compressDXT5A(M, &output->alpha);
//}
#if 0 // Multiple iterations do not seem to help.
// Decompress M.
output->alpha.decodeBlock(&M);
// Feed it back to the input RGB block.
for (uint i = 0; i < 16; i++) {
const Vector4 & c = input_colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;
float r = R / m;
float g = G / m;
float b = B / m;
float a = float(M.alpha[i]) / 255.0f;
input_colors_rgbm[i] = Vector4(r, g, b, a);
rgb_weights[i] = input_weights[i] * m;
}
#endif
return 0; // @@
}
#if 0
BlockDXT5 * block = new(output)BlockDXT5;
// Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.
// Compress the resulting M values optimally.
// Repeat this several times until compression error does not improve?
//Vector3 rgb_block[16];
//float m_block[16];
// Init RGB/M block.
#if 0
nvsquish::WeightedClusterFit fit;
ColorBlock rgba;
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgba.color(i) = toColor32(Vector4(r, g, b, a));
}
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
#endif
#if 1
ColorSet rgb;
rgb.allocate(4, 4);
for (uint i = 0; i < 16; i++) {
const Vector4 & c = colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setColorWeights(compressionOptions.colorWeight);
fit.setColorSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
// Decompress RGB/M block.
nv::ColorBlock RGB;
block->color.decodeBlock(&RGB);
#if 1
AlphaBlock4x4 M;
for (int i = 0; i < 16; i++) {
const Vector4 & c = colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m));
//float m = max(max(R, G), max(B, min_m));
m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = weights[i];
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#else
OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);
#endif
#if 0
// Decompress M.
block->alpha.decodeBlock(&M);
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
for (uint i = 0; i < src.colorCount; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
//float m = max(max(R, G), max(B, min_m));
float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;
float r = R / m;
float g = G / m;
float b = B / m;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
#if 0
block->color.decodeBlock(&RGB);
//AlphaBlock4x4 M;
//M.initWeights(src);
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m));
//float m = max(max(R, G), max(B, min_m));
m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = src.weights[i];
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#endif
#if 0
src.fromRGBM(M, min_m);
src.createMinimalSet(/*ignoreTransparent=*/true);
if (src.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(src.color(0), &block->color);
}
else {
// @@ Use our improved compressor.
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&src);
Vector3 start, end;
fit.compress4(&start, &end);
if (fit.compress3(&start, &end)) {
QuickCompress::outputBlock3(src, start, end, block->color);
}
else {
QuickCompress::outputBlock4(src, start, end, block->color);
}
}
#endif // 0
// @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?
// RGBM encoding.
// Maximize precision.
// - Number of possible grey levels:
// - Naive: 2^3 = 8
// - Better: 2^3 + 2^2 = 12
// - How to choose min_m?
// - Ideal = Adaptive per block, don't know where to store.
// - Adaptive per lightmap. How to compute optimal?
// - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.
// - Optimal compressor: Interpolation artifacts.
// - Color transform.
// - Measure error in post-tone-mapping color space.
// - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.
// - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.
// - Enhanced DXT compressor.
// - Typical RGBM encoding as follows:
// rgb -> M = max(rgb), RGB=rgb/M -> RGBM
// - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.
// - We could ensure that M' >= M during compression.
// - We could clamp RGB anyway.
// - We could add a fixed scale value to take into account compression errors and avoid clamping.
// Compress color.
/*if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}*/
#endif // 0