nvidia-texture-tools/src/nvtt/CompressorDXT5_RGBM.cpp

#include "CompressorDXT5_RGBM.h"
#include "CompressorDXT1.h"

#include "OptimalCompressDXT.h"
#include "QuickCompressDXT.h"

#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"

#include "nvmath/Color.inl"
#include "nvmath/Vector.inl"
#include "nvmath/Fitting.h"
#include "nvmath/ftoi.h"

#include "nvthread/Atomic.h"
#include <stdio.h>

using namespace nv;

//static uint atomic_counter = 0;


float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {

    // Convert to RGBM.
    Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
    float rgb_weights[16];

    float weight_sum = 0;

    for (uint i = 0; i < 16; i++) {
        const Vector4 & c = input_colors[i];

        float R = saturate(c.x);
        float G = saturate(c.y);
        float B = saturate(c.z);

        float M = max(max(R, G), max(B, min_m));
        float r = R / M;
        float g = G / M;
        float b = B / M;
        float a = (M - min_m) / (1 - min_m);

        input_colors_rgbm[i] = Vector4(r, g, b, a);
        rgb_weights[i] = input_weights[i] * M;
        weight_sum += input_weights[i];
    }

    if (weight_sum == 0) {
        for (uint i = 0; i < 16; i++) rgb_weights[i] = 1;
    }

    // Compress RGB.
    compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color);

    // Decompress RGB/M block.
    nv::ColorBlock RGB;
    output->color.decodeBlock(&RGB);

    // Compute M values to compensate for RGB's error.
    AlphaBlock4x4 M;
    for (int i = 0; i < 16; i++) {
        const Vector4 & c = input_colors[i];

        float R = saturate(c.x);
        float G = saturate(c.y);
        float B = saturate(c.z);

        float rm = RGB.color(i).r / 255.0f;
        float gm = RGB.color(i).g / 255.0f;
        float bm = RGB.color(i).b / 255.0f;

        // compute m such that m * (r/M, g/M, b/M) == RGB
    
        // Three equations, one unknown:
        //  m * r/M == R
        //  m * g/M == G
        //  m * b/M == B
        
        // Solve in the least squares sense!

        // m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T

        // m == dot(rgb, RGB) / dot(rgb, rgb)

        float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm));

        m = (m - min_m) / (1 - min_m);

#if 0
        // IC: This does indeed happen. What does that mean? The best choice of m is above the available range. If this happened too often it would make sense to scale m in
        // the pixel shader to allow for more accurate reconstruction. However, that scaling would reduce the precision over the [0-1] range. I haven't measured how much
        // error is introduced by the clamping vs. how much the error would change with the increased range.
        if (m > 1.0f) {
            uint counter = atomicIncrement(&atomic_counter);
            printf("It happens %u times!", counter);
        }
#endif

        M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
        M.weights[i] = input_weights[i];
    }

    // Compress M.
    //if (compressionOptions.quality == Quality_Fastest) {
    //    QuickCompress::compressDXT5A(M, &output->alpha);
    /*}
    else {*/
        OptimalCompress::compressDXT5A(M, &output->alpha);
    //}


#if 0   // Multiple iterations do not seem to help.
    // Decompress M.
    output->alpha.decodeBlock(&M);

    // Feed it back to the input RGB block.
    for (uint i = 0; i < 16; i++) {
        const Vector4 & c = input_colors[i];

        float R = saturate(c.x);
        float G = saturate(c.y);
        float B = saturate(c.z);

        float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;

        float r = R / m;
        float g = G / m;
        float b = B / m;
        float a = float(M.alpha[i]) / 255.0f;

        input_colors_rgbm[i] = Vector4(r, g, b, a);
        rgb_weights[i] = input_weights[i] * m;
    }
#endif

    return 0; // @@ 
}


#if 0

    BlockDXT5 * block = new(output)BlockDXT5;

    // Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.

    // Compress the resulting M values optimally.

    // Repeat this several times until compression error does not improve?

    //Vector3 rgb_block[16];
    //float m_block[16];


    // Init RGB/M block.
#if 0
    nvsquish::WeightedClusterFit fit;

    ColorBlock rgba;
    for (int i = 0; i < 16; i++) {
        const Vector4 & c = src.color(i);
        float R = saturate(c.x);
        float G = saturate(c.y);
        float B = saturate(c.z);

        float M = max(max(R, G), max(B, min_m));
        float r = R / M;
        float g = G / M;
        float b = B / M;
        float a = c.w;

        rgba.color(i) = toColor32(Vector4(r, g, b, a));
    }

    if (rgba.isSingleColor())
    {
        OptimalCompress::compressDXT1(rgba.color(0), &block->color);
    }
    else
    {
        nvsquish::WeightedClusterFit fit;
        fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);

        int flags = 0;
        if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;

        nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
        fit.SetColourSet(&colours, 0);
        fit.Compress(&block->color);
    }
#endif
#if 1
    ColorSet rgb;
    rgb.allocate(4, 4);

    for (uint i = 0; i < 16; i++) {
        const Vector4 & c = colors[i];

        float R = saturate(c.x);
        float G = saturate(c.y);
        float B = saturate(c.z);

        float M = max(max(R, G), max(B, min_m));
        float r = R / M;
        float g = G / M;
        float b = B / M;
        float a = c.w;

        rgb.colors[i] = Vector4(r, g, b, a);
        rgb.indices[i] = i;
        rgb.weights[i] = max(weights[i], 0.001f);// weights[i];   // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
    }

    rgb.createMinimalSet(/*ignoreTransparent=*/true);

    if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
        OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
    }
    else {
        ClusterFit fit;
        fit.setColorWeights(compressionOptions.colorWeight);
        fit.setColorSet(&rgb);

        Vector3 start, end;
        fit.compress4(&start, &end);

        QuickCompress::outputBlock4(rgb, start, end, &block->color);
    }
#endif

    // Decompress RGB/M block.
    nv::ColorBlock RGB;
    block->color.decodeBlock(&RGB);
    
#if 1
    AlphaBlock4x4 M;
    for (int i = 0; i < 16; i++) {
        const Vector4 & c = colors[i];
        float R = saturate(c.x);
        float G = saturate(c.y);
        float B = saturate(c.z);

        float r = RGB.color(i).r / 255.0f;
        float g = RGB.color(i).g / 255.0f;
        float b = RGB.color(i).b / 255.0f;

        float m = (R / r + G / g + B / b) / 3.0f;
        //float m = max((R / r + G / g + B / b) / 3.0f, min_m);
        //float m = max(max(R / r, G / g), max(B / b, min_m));
        //float m = max(max(R, G), max(B, min_m));

        m = (m - min_m) / (1 - min_m);

        M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
        M.weights[i] = weights[i];
    }

    // Compress M.
    if (compressionOptions.quality == Quality_Fastest) {
        QuickCompress::compressDXT5A(M, &block->alpha);
    }
    else {
        OptimalCompress::compressDXT5A(M, &block->alpha);
    }
#else
    OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);
#endif

#if 0
    // Decompress M.
    block->alpha.decodeBlock(&M);

    rgb.allocate(src.w, src.h);     // @@ Handle smaller blocks.

    for (uint i = 0; i < src.colorCount; i++) {
        const Vector4 & c = src.color(i);

        float R = saturate(c.x);
        float G = saturate(c.y);
        float B = saturate(c.z);

        //float m = max(max(R, G), max(B, min_m));
        float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;
        float r = R / m;
        float g = G / m;
        float b = B / m;
        float a = c.w;

        rgb.colors[i] = Vector4(r, g, b, a);
        rgb.indices[i] = i;
        rgb.weights[i] = max(c.w, 0.001f);// src.weights[i];   // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
    }

    rgb.createMinimalSet(/*ignoreTransparent=*/true);

    if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
        OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
    }
    else {
        ClusterFit fit;
        fit.setMetric(compressionOptions.colorWeight);
        fit.setColourSet(&rgb);

        Vector3 start, end;
        fit.compress4(&start, &end);

        QuickCompress::outputBlock4(rgb, start, end, &block->color);
    }
#endif

#if 0
    block->color.decodeBlock(&RGB);

    //AlphaBlock4x4 M;
    //M.initWeights(src);
    
    for (int i = 0; i < 16; i++) {
        const Vector4 & c = src.color(i);
        float R = saturate(c.x);
        float G = saturate(c.y);
        float B = saturate(c.z);

        float r = RGB.color(i).r / 255.0f;
        float g = RGB.color(i).g / 255.0f;
        float b = RGB.color(i).b / 255.0f;

        float m = (R / r + G / g + B / b) / 3.0f;
        //float m = max((R / r + G / g + B / b) / 3.0f, min_m);
        //float m = max(max(R / r, G / g), max(B / b, min_m));
        //float m = max(max(R, G), max(B, min_m));

        m = (m - min_m) / (1 - min_m);

        M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
        M.weights[i] = src.weights[i];
    }

    // Compress M.
    if (compressionOptions.quality == Quality_Fastest) {
        QuickCompress::compressDXT5A(M, &block->alpha);
    }
    else {
        OptimalCompress::compressDXT5A(M, &block->alpha);
    }
#endif


#if 0
    src.fromRGBM(M, min_m);

    src.createMinimalSet(/*ignoreTransparent=*/true);

    if (src.isSingleColor(/*ignoreAlpha=*/true)) {
        OptimalCompress::compressDXT1(src.color(0), &block->color);
    }
    else {
        // @@ Use our improved compressor.
        ClusterFit fit;
        fit.setMetric(compressionOptions.colorWeight);
        fit.setColourSet(&src);

        Vector3 start, end;
        fit.compress4(&start, &end);

        if (fit.compress3(&start, &end)) {
            QuickCompress::outputBlock3(src, start, end, block->color);
        }
        else {
            QuickCompress::outputBlock4(src, start, end, block->color);
        }
    }
#endif // 0

    // @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?


    // RGBM encoding.
    // Maximize precision.
    // - Number of possible grey levels:
    //   - Naive:  2^3 = 8
    //   - Better: 2^3 + 2^2 = 12
    //   - How to choose min_m? 
    //     - Ideal = Adaptive per block, don't know where to store.
    //     - Adaptive per lightmap. How to compute optimal?
    //     - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.

    // - Optimal compressor: Interpolation artifacts.

    // - Color transform. 
    //    - Measure error in post-tone-mapping color space. 
    //    - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.
    //    - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.

    // - Enhanced DXT compressor.
    //    - Typical RGBM encoding as follows:
    //      rgb -> M = max(rgb), RGB=rgb/M -> RGBM
    //    - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.
    //      - We could ensure that M' >= M during compression.
    //      - We could clamp RGB anyway.
    //      - We could add a fixed scale value to take into account compression errors and avoid clamping.


    // Compress color.
    /*if (rgba.isSingleColor())
    {
        OptimalCompress::compressDXT1(rgba.color(0), &block->color);
    }
    else
    {
        nvsquish::WeightedClusterFit fit;
        fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);

        int flags = 0;
        if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;

        nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
        fit.SetColourSet(&colours, 0);
        fit.Compress(&block->color);
    }*/

#endif // 0
Merge changes from The Witness. 2015-03-24 19:14:49 +00:00			`#include "CompressorDXT5_RGBM.h"`
			`#include "CompressorDXT1.h"`

			`#include "OptimalCompressDXT.h"`
			`#include "QuickCompressDXT.h"`

			`#include "nvimage/ColorBlock.h"`
			`#include "nvimage/BlockDXT.h"`

			`#include "nvmath/Color.inl"`
			`#include "nvmath/Vector.inl"`
			`#include "nvmath/Fitting.h"`
			`#include "nvmath/ftoi.h"`

			`#include "nvthread/Atomic.h"`
			`#include <stdio.h>`

			`using namespace nv;`

Merge changes from The Witness. 2015-10-29 06:53:08 +00:00			`//static uint atomic_counter = 0;`
Merge changes from The Witness. 2015-03-24 19:14:49 +00:00

			`float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {`

			`// Convert to RGBM.`
			`Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?`
			`float rgb_weights[16];`

			`float weight_sum = 0;`

			`for (uint i = 0; i < 16; i++) {`
			`const Vector4 & c = input_colors[i];`

			`float R = saturate(c.x);`
			`float G = saturate(c.y);`
			`float B = saturate(c.z);`

			`float M = max(max(R, G), max(B, min_m));`
			`float r = R / M;`
			`float g = G / M;`
			`float b = B / M;`
			`float a = (M - min_m) / (1 - min_m);`

			`input_colors_rgbm[i] = Vector4(r, g, b, a);`
			`rgb_weights[i] = input_weights[i] * M;`
			`weight_sum += input_weights[i];`
			`}`

			`if (weight_sum == 0) {`
			`for (uint i = 0; i < 16; i++) rgb_weights[i] = 1;`
			`}`

			`// Compress RGB.`
			`compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /three_color_mode=/false, &output->color);`

			`// Decompress RGB/M block.`
			`nv::ColorBlock RGB;`
			`output->color.decodeBlock(&RGB);`

			`// Compute M values to compensate for RGB's error.`
			`AlphaBlock4x4 M;`
			`for (int i = 0; i < 16; i++) {`
			`const Vector4 & c = input_colors[i];`

			`float R = saturate(c.x);`
			`float G = saturate(c.y);`
			`float B = saturate(c.z);`

			`float rm = RGB.color(i).r / 255.0f;`
			`float gm = RGB.color(i).g / 255.0f;`
			`float bm = RGB.color(i).b / 255.0f;`

			`// compute m such that m * (r/M, g/M, b/M) == RGB`

			`// Three equations, one unknown:`
			`// m * r/M == R`
			`// m * g/M == G`
			`// m * b/M == B`

			`// Solve in the least squares sense!`

			`// m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T`

			`// m == dot(rgb, RGB) / dot(rgb, rgb)`

			`float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm));`

			`m = (m - min_m) / (1 - min_m);`

Merge changes from The Witness. 2015-10-29 06:53:08 +00:00			`#if 0`
			`// IC: This does indeed happen. What does that mean? The best choice of m is above the available range. If this happened too often it would make sense to scale m in`
			`// the pixel shader to allow for more accurate reconstruction. However, that scaling would reduce the precision over the [0-1] range. I haven't measured how much`
			`// error is introduced by the clamping vs. how much the error would change with the increased range.`
Merge changes from The Witness. 2015-03-24 19:14:49 +00:00			`if (m > 1.0f) {`
			`uint counter = atomicIncrement(&atomic_counter);`
			`printf("It happens %u times!", counter);`
			`}`
Merge changes from The Witness. 2015-10-29 06:53:08 +00:00			`#endif`
Merge changes from The Witness. 2015-03-24 19:14:49 +00:00
			`M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));`
			`M.weights[i] = input_weights[i];`
			`}`

			`// Compress M.`
			`//if (compressionOptions.quality == Quality_Fastest) {`
			`// QuickCompress::compressDXT5A(M, &output->alpha);`
			`/*}`
			`else {*/`
			`OptimalCompress::compressDXT5A(M, &output->alpha);`
			`//}`


			`#if 0 // Multiple iterations do not seem to help.`
			`// Decompress M.`
			`output->alpha.decodeBlock(&M);`

			`// Feed it back to the input RGB block.`
			`for (uint i = 0; i < 16; i++) {`
			`const Vector4 & c = input_colors[i];`

			`float R = saturate(c.x);`
			`float G = saturate(c.y);`
			`float B = saturate(c.z);`

			`float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;`

			`float r = R / m;`
			`float g = G / m;`
			`float b = B / m;`
			`float a = float(M.alpha[i]) / 255.0f;`

			`input_colors_rgbm[i] = Vector4(r, g, b, a);`
			`rgb_weights[i] = input_weights[i] * m;`
			`}`
			`#endif`

			`return 0; // @@`
			`}`




			`#if 0`

			`BlockDXT5 * block = new(output)BlockDXT5;`

			`// Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.`

			`// Compress the resulting M values optimally.`

			`// Repeat this several times until compression error does not improve?`

			`//Vector3 rgb_block[16];`
			`//float m_block[16];`


			`// Init RGB/M block.`
			`#if 0`
			`nvsquish::WeightedClusterFit fit;`

			`ColorBlock rgba;`
			`for (int i = 0; i < 16; i++) {`
			`const Vector4 & c = src.color(i);`
			`float R = saturate(c.x);`
			`float G = saturate(c.y);`
			`float B = saturate(c.z);`

			`float M = max(max(R, G), max(B, min_m));`
			`float r = R / M;`
			`float g = G / M;`
			`float b = B / M;`
			`float a = c.w;`

			`rgba.color(i) = toColor32(Vector4(r, g, b, a));`
			`}`

			`if (rgba.isSingleColor())`
			`{`
			`OptimalCompress::compressDXT1(rgba.color(0), &block->color);`
			`}`
			`else`
			`{`
			`nvsquish::WeightedClusterFit fit;`
			`fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);`

			`int flags = 0;`
			`if (alphaMode == nvtt::AlphaMode_Transparency) flags \|= nvsquish::kWeightColourByAlpha;`

			`nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);`
			`fit.SetColourSet(&colours, 0);`
			`fit.Compress(&block->color);`
			`}`
			`#endif`
			`#if 1`
			`ColorSet rgb;`
			`rgb.allocate(4, 4);`

			`for (uint i = 0; i < 16; i++) {`
			`const Vector4 & c = colors[i];`

			`float R = saturate(c.x);`
			`float G = saturate(c.y);`
			`float B = saturate(c.z);`

			`float M = max(max(R, G), max(B, min_m));`
			`float r = R / M;`
			`float g = G / M;`
			`float b = B / M;`
			`float a = c.w;`

			`rgb.colors[i] = Vector4(r, g, b, a);`
			`rgb.indices[i] = i;`
			`rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.`
			`}`

			`rgb.createMinimalSet(/ignoreTransparent=/true);`

			`if (rgb.isSingleColor(/ignoreAlpha=/true)) {`
			`OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);`
			`}`
			`else {`
			`ClusterFit fit;`
			`fit.setColorWeights(compressionOptions.colorWeight);`
			`fit.setColorSet(&rgb);`

			`Vector3 start, end;`
			`fit.compress4(&start, &end);`

			`QuickCompress::outputBlock4(rgb, start, end, &block->color);`
			`}`
			`#endif`

			`// Decompress RGB/M block.`
			`nv::ColorBlock RGB;`
			`block->color.decodeBlock(&RGB);`

			`#if 1`
			`AlphaBlock4x4 M;`
			`for (int i = 0; i < 16; i++) {`
			`const Vector4 & c = colors[i];`
			`float R = saturate(c.x);`
			`float G = saturate(c.y);`
			`float B = saturate(c.z);`

			`float r = RGB.color(i).r / 255.0f;`
			`float g = RGB.color(i).g / 255.0f;`
			`float b = RGB.color(i).b / 255.0f;`

			`float m = (R / r + G / g + B / b) / 3.0f;`
			`//float m = max((R / r + G / g + B / b) / 3.0f, min_m);`
			`//float m = max(max(R / r, G / g), max(B / b, min_m));`
			`//float m = max(max(R, G), max(B, min_m));`

			`m = (m - min_m) / (1 - min_m);`

			`M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));`
			`M.weights[i] = weights[i];`
			`}`

			`// Compress M.`
			`if (compressionOptions.quality == Quality_Fastest) {`
			`QuickCompress::compressDXT5A(M, &block->alpha);`
			`}`
			`else {`
			`OptimalCompress::compressDXT5A(M, &block->alpha);`
			`}`
			`#else`
			`OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);`
			`#endif`

			`#if 0`
			`// Decompress M.`
			`block->alpha.decodeBlock(&M);`

			`rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.`

			`for (uint i = 0; i < src.colorCount; i++) {`
			`const Vector4 & c = src.color(i);`

			`float R = saturate(c.x);`
			`float G = saturate(c.y);`
			`float B = saturate(c.z);`

			`//float m = max(max(R, G), max(B, min_m));`
			`float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;`
			`float r = R / m;`
			`float g = G / m;`
			`float b = B / m;`
			`float a = c.w;`

			`rgb.colors[i] = Vector4(r, g, b, a);`
			`rgb.indices[i] = i;`
			`rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.`
			`}`

			`rgb.createMinimalSet(/ignoreTransparent=/true);`

			`if (rgb.isSingleColor(/ignoreAlpha=/true)) {`
			`OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);`
			`}`
			`else {`
			`ClusterFit fit;`
			`fit.setMetric(compressionOptions.colorWeight);`
			`fit.setColourSet(&rgb);`

			`Vector3 start, end;`
			`fit.compress4(&start, &end);`

			`QuickCompress::outputBlock4(rgb, start, end, &block->color);`
			`}`
			`#endif`

			`#if 0`
			`block->color.decodeBlock(&RGB);`

			`//AlphaBlock4x4 M;`
			`//M.initWeights(src);`

			`for (int i = 0; i < 16; i++) {`
			`const Vector4 & c = src.color(i);`
			`float R = saturate(c.x);`
			`float G = saturate(c.y);`
			`float B = saturate(c.z);`

			`float r = RGB.color(i).r / 255.0f;`
			`float g = RGB.color(i).g / 255.0f;`
			`float b = RGB.color(i).b / 255.0f;`

			`float m = (R / r + G / g + B / b) / 3.0f;`
			`//float m = max((R / r + G / g + B / b) / 3.0f, min_m);`
			`//float m = max(max(R / r, G / g), max(B / b, min_m));`
			`//float m = max(max(R, G), max(B, min_m));`

			`m = (m - min_m) / (1 - min_m);`

			`M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));`
			`M.weights[i] = src.weights[i];`
			`}`

			`// Compress M.`
			`if (compressionOptions.quality == Quality_Fastest) {`
			`QuickCompress::compressDXT5A(M, &block->alpha);`
			`}`
			`else {`
			`OptimalCompress::compressDXT5A(M, &block->alpha);`
			`}`
			`#endif`



			`#if 0`
			`src.fromRGBM(M, min_m);`

			`src.createMinimalSet(/ignoreTransparent=/true);`

			`if (src.isSingleColor(/ignoreAlpha=/true)) {`
			`OptimalCompress::compressDXT1(src.color(0), &block->color);`
			`}`
			`else {`
			`// @@ Use our improved compressor.`
			`ClusterFit fit;`
			`fit.setMetric(compressionOptions.colorWeight);`
			`fit.setColourSet(&src);`

			`Vector3 start, end;`
			`fit.compress4(&start, &end);`

			`if (fit.compress3(&start, &end)) {`
			`QuickCompress::outputBlock3(src, start, end, block->color);`
			`}`
			`else {`
			`QuickCompress::outputBlock4(src, start, end, block->color);`
			`}`
			`}`
			`#endif // 0`

			`// @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?`



			`// RGBM encoding.`
			`// Maximize precision.`
			`// - Number of possible grey levels:`
			`// - Naive: 2^3 = 8`
			`// - Better: 2^3 + 2^2 = 12`
			`// - How to choose min_m?`
			`// - Ideal = Adaptive per block, don't know where to store.`
			`// - Adaptive per lightmap. How to compute optimal?`
			`// - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.`

			`// - Optimal compressor: Interpolation artifacts.`

			`// - Color transform.`
			`// - Measure error in post-tone-mapping color space.`
			`// - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.`
			`// - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.`

			`// - Enhanced DXT compressor.`
			`// - Typical RGBM encoding as follows:`
			`// rgb -> M = max(rgb), RGB=rgb/M -> RGBM`
			`// - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.`
			`// - We could ensure that M' >= M during compression.`
			`// - We could clamp RGB anyway.`
			`// - We could add a fixed scale value to take into account compression errors and avoid clamping.`





			`// Compress color.`
			`/*if (rgba.isSingleColor())`
			`{`
			`OptimalCompress::compressDXT1(rgba.color(0), &block->color);`
			`}`
			`else`
			`{`
			`nvsquish::WeightedClusterFit fit;`
			`fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);`

			`int flags = 0;`
			`if (alphaMode == nvtt::AlphaMode_Transparency) flags \|= nvsquish::kWeightColourByAlpha;`

			`nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);`
			`fit.SetColourSet(&colours, 0);`
			`fit.Compress(&block->color);`
			`}*/`

			`#endif // 0`