#include "CompressorDXT5_RGBM.h" #include "CompressorDXT1.h" #include "OptimalCompressDXT.h" #include "QuickCompressDXT.h" #include "nvimage/ColorBlock.h" #include "nvimage/BlockDXT.h" #include "nvmath/Color.inl" #include "nvmath/Vector.inl" #include "nvmath/Fitting.h" #include "nvmath/ftoi.h" #include "nvthread/Atomic.h" #include using namespace nv; //static uint atomic_counter = 0; float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) { // Convert to RGBM. Vector4 input_colors_rgbm[16]; // @@ Write over input_colors? float rgb_weights[16]; float weight_sum = 0; for (uint i = 0; i < 16; i++) { const Vector4 & c = input_colors[i]; float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); float M = max(max(R, G), max(B, min_m)); float r = R / M; float g = G / M; float b = B / M; float a = (M - min_m) / (1 - min_m); input_colors_rgbm[i] = Vector4(r, g, b, a); rgb_weights[i] = input_weights[i] * M; weight_sum += input_weights[i]; } if (weight_sum == 0) { for (uint i = 0; i < 16; i++) rgb_weights[i] = 1; } // Compress RGB. compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color); // Decompress RGB/M block. nv::ColorBlock RGB; output->color.decodeBlock(&RGB); // Compute M values to compensate for RGB's error. AlphaBlock4x4 M; for (int i = 0; i < 16; i++) { const Vector4 & c = input_colors[i]; float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); float rm = RGB.color(i).r / 255.0f; float gm = RGB.color(i).g / 255.0f; float bm = RGB.color(i).b / 255.0f; // compute m such that m * (r/M, g/M, b/M) == RGB // Three equations, one unknown: // m * r/M == R // m * g/M == G // m * b/M == B // Solve in the least squares sense! // m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T // m == dot(rgb, RGB) / dot(rgb, rgb) float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm)); m = (m - min_m) / (1 - min_m); #if 0 // IC: This does indeed happen. What does that mean? The best choice of m is above the available range. If this happened too often it would make sense to scale m in // the pixel shader to allow for more accurate reconstruction. However, that scaling would reduce the precision over the [0-1] range. I haven't measured how much // error is introduced by the clamping vs. how much the error would change with the increased range. if (m > 1.0f) { uint counter = atomicIncrement(&atomic_counter); printf("It happens %u times!", counter); } #endif M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); M.weights[i] = input_weights[i]; } // Compress M. //if (compressionOptions.quality == Quality_Fastest) { // QuickCompress::compressDXT5A(M, &output->alpha); /*} else {*/ OptimalCompress::compressDXT5A(M, &output->alpha); //} #if 0 // Multiple iterations do not seem to help. // Decompress M. output->alpha.decodeBlock(&M); // Feed it back to the input RGB block. for (uint i = 0; i < 16; i++) { const Vector4 & c = input_colors[i]; float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m; float r = R / m; float g = G / m; float b = B / m; float a = float(M.alpha[i]) / 255.0f; input_colors_rgbm[i] = Vector4(r, g, b, a); rgb_weights[i] = input_weights[i] * m; } #endif return 0; // @@ } #if 0 BlockDXT5 * block = new(output)BlockDXT5; // Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors. // Compress the resulting M values optimally. // Repeat this several times until compression error does not improve? //Vector3 rgb_block[16]; //float m_block[16]; // Init RGB/M block. #if 0 nvsquish::WeightedClusterFit fit; ColorBlock rgba; for (int i = 0; i < 16; i++) { const Vector4 & c = src.color(i); float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); float M = max(max(R, G), max(B, min_m)); float r = R / M; float g = G / M; float b = B / M; float a = c.w; rgba.color(i) = toColor32(Vector4(r, g, b, a)); } if (rgba.isSingleColor()) { OptimalCompress::compressDXT1(rgba.color(0), &block->color); } else { nvsquish::WeightedClusterFit fit; fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); int flags = 0; if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); fit.SetColourSet(&colours, 0); fit.Compress(&block->color); } #endif #if 1 ColorSet rgb; rgb.allocate(4, 4); for (uint i = 0; i < 16; i++) { const Vector4 & c = colors[i]; float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); float M = max(max(R, G), max(B, min_m)); float r = R / M; float g = G / M; float b = B / M; float a = c.w; rgb.colors[i] = Vector4(r, g, b, a); rgb.indices[i] = i; rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. } rgb.createMinimalSet(/*ignoreTransparent=*/true); if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); } else { ClusterFit fit; fit.setColorWeights(compressionOptions.colorWeight); fit.setColorSet(&rgb); Vector3 start, end; fit.compress4(&start, &end); QuickCompress::outputBlock4(rgb, start, end, &block->color); } #endif // Decompress RGB/M block. nv::ColorBlock RGB; block->color.decodeBlock(&RGB); #if 1 AlphaBlock4x4 M; for (int i = 0; i < 16; i++) { const Vector4 & c = colors[i]; float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); float r = RGB.color(i).r / 255.0f; float g = RGB.color(i).g / 255.0f; float b = RGB.color(i).b / 255.0f; float m = (R / r + G / g + B / b) / 3.0f; //float m = max((R / r + G / g + B / b) / 3.0f, min_m); //float m = max(max(R / r, G / g), max(B / b, min_m)); //float m = max(max(R, G), max(B, min_m)); m = (m - min_m) / (1 - min_m); M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); M.weights[i] = weights[i]; } // Compress M. if (compressionOptions.quality == Quality_Fastest) { QuickCompress::compressDXT5A(M, &block->alpha); } else { OptimalCompress::compressDXT5A(M, &block->alpha); } #else OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha); #endif #if 0 // Decompress M. block->alpha.decodeBlock(&M); rgb.allocate(src.w, src.h); // @@ Handle smaller blocks. for (uint i = 0; i < src.colorCount; i++) { const Vector4 & c = src.color(i); float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); //float m = max(max(R, G), max(B, min_m)); float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m; float r = R / m; float g = G / m; float b = B / m; float a = c.w; rgb.colors[i] = Vector4(r, g, b, a); rgb.indices[i] = i; rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. } rgb.createMinimalSet(/*ignoreTransparent=*/true); if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); } else { ClusterFit fit; fit.setMetric(compressionOptions.colorWeight); fit.setColourSet(&rgb); Vector3 start, end; fit.compress4(&start, &end); QuickCompress::outputBlock4(rgb, start, end, &block->color); } #endif #if 0 block->color.decodeBlock(&RGB); //AlphaBlock4x4 M; //M.initWeights(src); for (int i = 0; i < 16; i++) { const Vector4 & c = src.color(i); float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); float r = RGB.color(i).r / 255.0f; float g = RGB.color(i).g / 255.0f; float b = RGB.color(i).b / 255.0f; float m = (R / r + G / g + B / b) / 3.0f; //float m = max((R / r + G / g + B / b) / 3.0f, min_m); //float m = max(max(R / r, G / g), max(B / b, min_m)); //float m = max(max(R, G), max(B, min_m)); m = (m - min_m) / (1 - min_m); M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); M.weights[i] = src.weights[i]; } // Compress M. if (compressionOptions.quality == Quality_Fastest) { QuickCompress::compressDXT5A(M, &block->alpha); } else { OptimalCompress::compressDXT5A(M, &block->alpha); } #endif #if 0 src.fromRGBM(M, min_m); src.createMinimalSet(/*ignoreTransparent=*/true); if (src.isSingleColor(/*ignoreAlpha=*/true)) { OptimalCompress::compressDXT1(src.color(0), &block->color); } else { // @@ Use our improved compressor. ClusterFit fit; fit.setMetric(compressionOptions.colorWeight); fit.setColourSet(&src); Vector3 start, end; fit.compress4(&start, &end); if (fit.compress3(&start, &end)) { QuickCompress::outputBlock3(src, start, end, block->color); } else { QuickCompress::outputBlock4(src, start, end, block->color); } } #endif // 0 // @@ Decompress color and compute M that best approximates src with these colors? Then compress M again? // RGBM encoding. // Maximize precision. // - Number of possible grey levels: // - Naive: 2^3 = 8 // - Better: 2^3 + 2^2 = 12 // - How to choose min_m? // - Ideal = Adaptive per block, don't know where to store. // - Adaptive per lightmap. How to compute optimal? // - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range. // - Optimal compressor: Interpolation artifacts. // - Color transform. // - Measure error in post-tone-mapping color space. // - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game. // - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space. // - Enhanced DXT compressor. // - Typical RGBM encoding as follows: // rgb -> M = max(rgb), RGB=rgb/M -> RGBM // - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1. // - We could ensure that M' >= M during compression. // - We could clamp RGB anyway. // - We could add a fixed scale value to take into account compression errors and avoid clamping. // Compress color. /*if (rgba.isSingleColor()) { OptimalCompress::compressDXT1(rgba.color(0), &block->color); } else { nvsquish::WeightedClusterFit fit; fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); int flags = 0; if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); fit.SetColourSet(&colours, 0); fit.Compress(&block->color); }*/ #endif // 0