Merge changes from the witness.
parent
4cb60cc5ba
commit
d019cd7080
@ -0,0 +1,256 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#pragma once
|
||||
#ifndef NV_MATH_FTOI_H
|
||||
#define NV_MATH_FTOI_H
|
||||
|
||||
#include "nvmath/nvmath.h"
|
||||
|
||||
#include <math.h>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
// Optimized float to int conversions. See:
|
||||
// http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
|
||||
// http://www.stereopsis.com/sree/fpu2006.html
|
||||
// http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
|
||||
// http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
|
||||
|
||||
|
||||
union DoubleAnd64 {
|
||||
uint64 i;
|
||||
double d;
|
||||
};
|
||||
|
||||
static const double floatutil_xs_doublemagic = (6755399441055744.0); // 2^52 * 1.5
|
||||
static const double floatutil_xs_doublemagicdelta = (1.5e-8); // almost .5f = .5f + 1e^(number of exp bit)
|
||||
static const double floatutil_xs_doublemagicroundeps = (0.5f - floatutil_xs_doublemagicdelta); // almost .5f = .5f - 1e^(number of exp bit)
|
||||
|
||||
NV_FORCEINLINE int ftoi_round_xs(double val, double magic) {
|
||||
#if 1
|
||||
DoubleAnd64 dunion;
|
||||
dunion.d = val + magic;
|
||||
return (int32) dunion.i; // just cast to grab the bottom bits
|
||||
#else
|
||||
val += magic;
|
||||
return ((int*)&val)[0]; // @@ Assumes little endian.
|
||||
#endif
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_round_xs(float val) {
|
||||
return ftoi_round_xs(val, floatutil_xs_doublemagic);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_floor_xs(float val) {
|
||||
return ftoi_round_xs(val - floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_ceil_xs(float val) {
|
||||
return ftoi_round_xs(val + floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_trunc_xs(float val) {
|
||||
return (val<0) ? ftoi_ceil_xs(val) : ftoi_floor_xs(val);
|
||||
}
|
||||
|
||||
#if NV_CPU_X86 || NV_CPU_X86_64
|
||||
|
||||
NV_FORCEINLINE int ftoi_round_sse(float f) {
|
||||
return _mm_cvt_ss2si(_mm_set_ss(f));
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_trunc_sse(float f) {
|
||||
return _mm_cvtt_ss2si(_mm_set_ss(f));
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
#if NV_USE_SSE
|
||||
|
||||
NV_FORCEINLINE int ftoi_round(float val) {
|
||||
return ftoi_round_sse(val);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_trunc(float f) {
|
||||
return ftoi_trunc_sse(f);
|
||||
}
|
||||
|
||||
// We can probably do better than this. See for example:
|
||||
// http://dss.stephanierct.com/DevBlog/?p=8
|
||||
NV_FORCEINLINE int ftoi_floor(float val) {
|
||||
return ftoi_round(floorf(val));
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_ceil(float val) {
|
||||
return ftoi_round(ceilf(val));
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
// In theory this should work with any double floating point math implementation, but it appears that MSVC produces incorrect code
|
||||
// when SSE2 is targeted and fast math is enabled (/arch:SSE2 & /fp:fast). These problems go away with /fp:precise, which is the default mode.
|
||||
|
||||
NV_FORCEINLINE int ftoi_round(float val) {
|
||||
return ftoi_round_xs(val);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_floor(float val) {
|
||||
return ftoi_floor_xs(val);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_ceil(float val) {
|
||||
return ftoi_ceil_xs(val);
|
||||
}
|
||||
|
||||
NV_FORCEINLINE int ftoi_trunc(float f) {
|
||||
return ftoi_trunc_xs(f);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
inline void test_ftoi() {
|
||||
|
||||
// Round to nearest integer.
|
||||
nvCheck(ftoi_round(0.1f) == 0);
|
||||
nvCheck(ftoi_round(0.6f) == 1);
|
||||
nvCheck(ftoi_round(-0.2f) == 0);
|
||||
nvCheck(ftoi_round(-0.7f) == -1);
|
||||
nvCheck(ftoi_round(10.1f) == 10);
|
||||
nvCheck(ftoi_round(10.6f) == 11);
|
||||
nvCheck(ftoi_round(-90.1f) == -90);
|
||||
nvCheck(ftoi_round(-90.6f) == -91);
|
||||
|
||||
nvCheck(ftoi_round(0) == 0);
|
||||
nvCheck(ftoi_round(1) == 1);
|
||||
nvCheck(ftoi_round(-1) == -1);
|
||||
|
||||
nvCheck(ftoi_round(0.5f) == 0); // How are midpoints rounded? Bankers rounding.
|
||||
nvCheck(ftoi_round(1.5f) == 2);
|
||||
nvCheck(ftoi_round(2.5f) == 2);
|
||||
nvCheck(ftoi_round(3.5f) == 4);
|
||||
nvCheck(ftoi_round(4.5f) == 4);
|
||||
nvCheck(ftoi_round(-0.5f) == 0);
|
||||
nvCheck(ftoi_round(-1.5f) == -2);
|
||||
|
||||
|
||||
// Truncation (round down if > 0, round up if < 0).
|
||||
nvCheck(ftoi_trunc(0.1f) == 0);
|
||||
nvCheck(ftoi_trunc(0.6f) == 0);
|
||||
nvCheck(ftoi_trunc(-0.2f) == 0);
|
||||
nvCheck(ftoi_trunc(-0.7f) == 0); // @@ When using /arch:SSE2 in Win32, msvc produce wrong code for this one. It is skipping the addition.
|
||||
nvCheck(ftoi_trunc(1.99f) == 1);
|
||||
nvCheck(ftoi_trunc(-1.2f) == -1);
|
||||
|
||||
// Floor (round down).
|
||||
nvCheck(ftoi_floor(0.1f) == 0);
|
||||
nvCheck(ftoi_floor(0.6f) == 0);
|
||||
nvCheck(ftoi_floor(-0.2f) == -1);
|
||||
nvCheck(ftoi_floor(-0.7f) == -1);
|
||||
nvCheck(ftoi_floor(1.99f) == 1);
|
||||
nvCheck(ftoi_floor(-1.2f) == -2);
|
||||
|
||||
nvCheck(ftoi_floor(0) == 0);
|
||||
nvCheck(ftoi_floor(1) == 1);
|
||||
nvCheck(ftoi_floor(-1) == -1);
|
||||
nvCheck(ftoi_floor(2) == 2);
|
||||
nvCheck(ftoi_floor(-2) == -2);
|
||||
|
||||
// Ceil (round up).
|
||||
nvCheck(ftoi_ceil(0.1f) == 1);
|
||||
nvCheck(ftoi_ceil(0.6f) == 1);
|
||||
nvCheck(ftoi_ceil(-0.2f) == 0);
|
||||
nvCheck(ftoi_ceil(-0.7f) == 0);
|
||||
nvCheck(ftoi_ceil(1.99f) == 2);
|
||||
nvCheck(ftoi_ceil(-1.2f) == -1);
|
||||
|
||||
nvCheck(ftoi_ceil(0) == 0);
|
||||
nvCheck(ftoi_ceil(1) == 1);
|
||||
nvCheck(ftoi_ceil(-1) == -1);
|
||||
nvCheck(ftoi_ceil(2) == 2);
|
||||
nvCheck(ftoi_ceil(-2) == -2);
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Safe versions using standard casts.
|
||||
|
||||
inline int iround(float f)
|
||||
{
|
||||
return int(floorf(f + 0.5f));
|
||||
}
|
||||
|
||||
inline int iround(double f)
|
||||
{
|
||||
return int(::floor(f + 0.5));
|
||||
}
|
||||
|
||||
inline int ifloor(float f)
|
||||
{
|
||||
return int(floorf(f));
|
||||
}
|
||||
|
||||
inline int iceil(float f)
|
||||
{
|
||||
return int(ceilf(f));
|
||||
}
|
||||
|
||||
|
||||
|
||||
// I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
|
||||
// Some notes from cbloom: http://cbloomrants.blogspot.com/2011/07/07-26-11-pixel-int-to-float-options.html
|
||||
|
||||
// Quantize a float in the [0,1] range, using exact end points or uniform bins.
|
||||
inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
|
||||
nvDebugCheck(bits <= 16);
|
||||
|
||||
float range = float(1 << bits);
|
||||
if (exactEndPoints) {
|
||||
return floorf(x * (range-1) + 0.5f) / (range-1);
|
||||
}
|
||||
else {
|
||||
return (floorf(x * range) + 0.5f) / range;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// This is the most common rounding mode:
|
||||
//
|
||||
// 0 1 2 3
|
||||
// |___|_______|_______|___|
|
||||
// 0 1
|
||||
//
|
||||
// You get that if you take the unit floating point number multiply by 'N-1' and round to nearest. That is, `i = round(f * (N-1))`.
|
||||
// You reconstruct the original float dividing by 'N-1': `f = i / (N-1)`
|
||||
|
||||
|
||||
// 0 1 2 3
|
||||
// |_____|_____|_____|_____|
|
||||
// 0 1
|
||||
|
||||
/*enum BinningMode {
|
||||
RoundMode_ExactEndPoints,
|
||||
RoundMode_UniformBins,
|
||||
};*/
|
||||
|
||||
template <int N>
|
||||
inline uint unitFloatToFixed(float f) {
|
||||
return ftoi_round(f * ((1<<N)-1));
|
||||
}
|
||||
|
||||
inline uint8 unitFloatToFixed8(float f) {
|
||||
return (uint8)unitFloatToFixed<8>(f);
|
||||
}
|
||||
|
||||
inline uint16 unitFloatToFixed16(float f) {
|
||||
return (uint16)unitFloatToFixed<16>(f);
|
||||
}
|
||||
|
||||
|
||||
} // nv
|
||||
|
||||
#endif // NV_MATH_FTOI_H
|
@ -0,0 +1,461 @@
|
||||
|
||||
#include "CompressorDXT1.h"
|
||||
#include "SingleColorLookup.h"
|
||||
#include "ClusterFit.h"
|
||||
#include "QuickCompressDXT.h" // Deprecate.
|
||||
|
||||
#include "nvimage/ColorBlock.h"
|
||||
#include "nvimage/BlockDXT.h"
|
||||
|
||||
#include "nvmath/Color.inl"
|
||||
#include "nvmath/Vector.inl"
|
||||
#include "nvmath/Fitting.h"
|
||||
#include "nvmath/ftoi.h"
|
||||
|
||||
#include "nvcore/Utils.h" // swap
|
||||
|
||||
#include <string.h> // memset
|
||||
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
inline static void color_block_to_vector_block(const ColorBlock & rgba, Vector3 block[16])
|
||||
{
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
const Color32 c = rgba.color(i);
|
||||
block[i] = Vector3(c.r, c.g, c.b);
|
||||
}
|
||||
}
|
||||
|
||||
inline Vector3 r5g6b5_to_vector3(int r, int g, int b)
|
||||
{
|
||||
Vector3 c;
|
||||
c.x = float((r << 3) | (r >> 2));
|
||||
c.y = float((g << 2) | (g >> 4));
|
||||
c.z = float((b << 3) | (b >> 2));
|
||||
return c;
|
||||
}
|
||||
|
||||
inline Vector3 color_to_vector3(Color32 c)
|
||||
{
|
||||
const float scale = 1.0f / 255.0f;
|
||||
return Vector3(c.r * scale, c.g * scale, c.b * scale);
|
||||
}
|
||||
|
||||
inline Color32 vector3_to_color(Vector3 v)
|
||||
{
|
||||
Color32 color;
|
||||
color.r = U8(ftoi_round(saturate(v.x) * 255));
|
||||
color.g = U8(ftoi_round(saturate(v.y) * 255));
|
||||
color.b = U8(ftoi_round(saturate(v.z) * 255));
|
||||
color.a = 255;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Find first valid color.
|
||||
static bool find_valid_color_rgb(const Vector3 * colors, const float * weights, int count, Vector3 * valid_color)
|
||||
{
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (weights[i] > 0.0f) {
|
||||
*valid_color = colors[i];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// No valid colors.
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool is_single_color_rgb(const Vector3 * colors, const float * weights, int count, Vector3 color)
|
||||
{
|
||||
for (int i = 0; i < count; i++) {
|
||||
if (weights[i] > 0.0f) {
|
||||
if (colors[i] != color) return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Find similar colors and combine them together.
|
||||
static int reduce_colors(const Vector3 * input_colors, const float * input_weights, Vector3 * colors, float * weights)
|
||||
{
|
||||
int n = 0;
|
||||
for (int i = 0; i < 16; i++)
|
||||
{
|
||||
Vector3 ci = input_colors[i];
|
||||
float wi = input_weights[i];
|
||||
|
||||
if (wi > 0) {
|
||||
// Find matching color.
|
||||
int j;
|
||||
for (j = 0; j < n; j++) {
|
||||
if (equal(colors[j].x, ci.x) && equal(colors[j].y, ci.y) && equal(colors[j].z, ci.z)) {
|
||||
weights[j] += wi;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// No match found. Add new color.
|
||||
if (j == n) {
|
||||
colors[n] = ci;
|
||||
weights[n] = wi;
|
||||
n++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nvDebugCheck(n <= 16);
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// Different ways of estimating the error.
|
||||
static float evaluate_mse(const Vector3 & p, const Vector3 & c) {
|
||||
return square(p.x-c.x) + square(p.y-c.y) + square(p.z-c.z);
|
||||
}
|
||||
|
||||
/*static float evaluate_mse(const Vector3 & p, const Vector3 & c, const Vector3 & w) {
|
||||
return ww.x * square(p.x-c.x) + ww.y * square(p.y-c.y) + ww.z * square(p.z-c.z);
|
||||
}*/
|
||||
|
||||
static int evaluate_mse_rgb(const Color32 & p, const Color32 & c) {
|
||||
return square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b);
|
||||
}
|
||||
|
||||
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c) {
|
||||
float e0 = evaluate_mse(palette[0], c);
|
||||
float e1 = evaluate_mse(palette[1], c);
|
||||
float e2 = evaluate_mse(palette[2], c);
|
||||
float e3 = evaluate_mse(palette[3], c);
|
||||
return min(min(e0, e1), min(e2, e3));
|
||||
}
|
||||
|
||||
static int evaluate_mse(const Color32 palette[4], const Color32 & c) {
|
||||
int e0 = evaluate_mse_rgb(palette[0], c);
|
||||
int e1 = evaluate_mse_rgb(palette[1], c);
|
||||
int e2 = evaluate_mse_rgb(palette[2], c);
|
||||
int e3 = evaluate_mse_rgb(palette[3], c);
|
||||
return min(min(e0, e1), min(e2, e3));
|
||||
}
|
||||
|
||||
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, int index) {
|
||||
return evaluate_mse(palette[index], c);
|
||||
}
|
||||
|
||||
static int evaluate_mse(const Color32 palette[4], const Color32 & c, int index) {
|
||||
return evaluate_mse_rgb(palette[index], c);
|
||||
}
|
||||
|
||||
|
||||
static float evaluate_mse(const BlockDXT1 * output, Vector3 colors[16]) {
|
||||
Color32 palette[4];
|
||||
output->evaluatePalette(palette, /*d3d9=*/false);
|
||||
|
||||
// convert palette to float.
|
||||
Vector3 vector_palette[4];
|
||||
for (int i = 0; i < 4; i++) {
|
||||
vector_palette[i] = color_to_vector3(palette[i]);
|
||||
}
|
||||
|
||||
// evaluate error for each index.
|
||||
float error = 0.0f;
|
||||
for (int i = 0; i < 16; i++) {
|
||||
int index = (output->indices >> (2*i)) & 3; // @@ Is this the right order?
|
||||
error += evaluate_mse(vector_palette, colors[i], index);
|
||||
}
|
||||
|
||||
return error;
|
||||
}
|
||||
|
||||
static int evaluate_mse(const BlockDXT1 * output, Color32 color, int index) {
|
||||
Color32 palette[4];
|
||||
output->evaluatePalette(palette, /*d3d9=*/false);
|
||||
|
||||
return evaluate_mse(palette, color, index);
|
||||
}
|
||||
|
||||
|
||||
/*void output_block3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
|
||||
{
|
||||
Vector3 minColor = start * 255.0f;
|
||||
Vector3 maxColor = end * 255.0f;
|
||||
uint16 color0 = roundAndExpand(&minColor);
|
||||
uint16 color1 = roundAndExpand(&maxColor);
|
||||
|
||||
if (color0 > color1) {
|
||||
swap(maxColor, minColor);
|
||||
swap(color0, color1);
|
||||
}
|
||||
|
||||
block->col0 = Color16(color0);
|
||||
block->col1 = Color16(color1);
|
||||
block->indices = compute_indices3(colors, weights, count, maxColor / 255.0f, minColor / 255.0f);
|
||||
|
||||
//optimizeEndPoints3(set, block);
|
||||
}*/
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
// Single color compressor, based on:
|
||||
// https://mollyrocket.com/forums/viewtopic.php?t=392
|
||||
float nv::compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output)
|
||||
{
|
||||
output->col0.r = OMatch5[c.r][0];
|
||||
output->col0.g = OMatch6[c.g][0];
|
||||
output->col0.b = OMatch5[c.b][0];
|
||||
output->col1.r = OMatch5[c.r][1];
|
||||
output->col1.g = OMatch6[c.g][1];
|
||||
output->col1.b = OMatch5[c.b][1];
|
||||
output->indices = 0xaaaaaaaa;
|
||||
|
||||
if (output->col0.u < output->col1.u)
|
||||
{
|
||||
swap(output->col0.u, output->col1.u);
|
||||
output->indices ^= 0x55555555;
|
||||
}
|
||||
|
||||
return (float) evaluate_mse(output, c, output->indices & 3);
|
||||
}
|
||||
|
||||
|
||||
float nv::compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output)
|
||||
{
|
||||
return compress_dxt1_single_color_optimal(vector3_to_color(color), output);
|
||||
}
|
||||
|
||||
|
||||
// Low quality baseline compressor.
|
||||
float nv::compress_dxt1_least_squares_fit(const Vector3 * input_colors, const Vector3 * colors, const float * weights, int count, BlockDXT1 * output)
|
||||
{
|
||||
// @@ Iterative best end point fit.
|
||||
|
||||
return FLT_MAX;
|
||||
}
|
||||
|
||||
|
||||
static Color32 bitexpand_color16_to_color32(Color16 c16) {
|
||||
Color32 c32;
|
||||
c32.b = (c16.b << 3) | (c16.b >> 2);
|
||||
c32.g = (c16.g << 2) | (c16.g >> 4);
|
||||
c32.r = (c16.r << 3) | (c16.r >> 2);
|
||||
c32.a = 0xFF;
|
||||
|
||||
//c32.u = ((c16.u << 3) & 0xf8) | ((c16.u << 5) & 0xfc00) | ((c16.u << 8) & 0xf80000);
|
||||
//c32.u |= (c32.u >> 5) & 0x070007;
|
||||
//c32.u |= (c32.u >> 6) & 0x000300;
|
||||
|
||||
return c32;
|
||||
}
|
||||
|
||||
static Color32 bitexpand_color16_to_color32(int r, int g, int b) {
|
||||
Color32 c32;
|
||||
c32.b = (b << 3) | (b >> 2);
|
||||
c32.g = (g << 2) | (g >> 4);
|
||||
c32.r = (r << 3) | (r >> 2);
|
||||
c32.a = 0xFF;
|
||||
return c32;
|
||||
}
|
||||
|
||||
static Color16 truncate_color32_to_color16(Color32 c32) {
|
||||
Color16 c16;
|
||||
c16.b = (c32.b >> 3);
|
||||
c16.g = (c32.g >> 2);
|
||||
c16.r = (c32.r >> 3);
|
||||
return c16;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
static float evaluate_palette4(Color32 palette[4]) {
|
||||
palette[2].r = (2 * palette[0].r + palette[1].r) / 3;
|
||||
palette[2].g = (2 * palette[0].g + palette[1].g) / 3;
|
||||
palette[2].b = (2 * palette[0].b + palette[1].b) / 3;
|
||||
palette[3].r = (2 * palette[1].r + palette[0].r) / 3;
|
||||
palette[3].g = (2 * palette[1].g + palette[0].g) / 3;
|
||||
palette[3].b = (2 * palette[1].b + palette[0].b) / 3;
|
||||
}
|
||||
|
||||
static float evaluate_palette3(Color32 palette[4]) {
|
||||
palette[2].r = (palette[0].r + palette[1].r) / 2;
|
||||
palette[2].g = (palette[0].g + palette[1].g) / 2;
|
||||
palette[2].b = (palette[0].b + palette[1].b) / 2;
|
||||
palette[3].r = 0;
|
||||
palette[3].g = 0;
|
||||
palette[3].b = 0;
|
||||
}
|
||||
|
||||
static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) {
|
||||
|
||||
float total = 0.0f;
|
||||
for (int i = 0; i < count; i++) {
|
||||
total += (weights[i] * weights[i]) * evaluate_mse(palette, colors[i]);
|
||||
}
|
||||
|
||||
return total;
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
float nv::compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, int max_volume, BlockDXT1 * output)
|
||||
{
|
||||
// Compute bounding box.
|
||||
Vector3 min_color(1.0f);
|
||||
Vector3 max_color(0.0f);
|
||||
|
||||
for (int i = 0; i < count; i++) {
|
||||
min_color = min(min_color, colors[i]);
|
||||
max_color = max(max_color, colors[i]);
|
||||
}
|
||||
|
||||
// Convert to 5:6:5
|
||||
int min_r = ftoi_floor(31 * min_color.x);
|
||||
int min_g = ftoi_floor(63 * min_color.y);
|
||||
int min_b = ftoi_floor(31 * min_color.z);
|
||||
int max_r = ftoi_ceil(31 * max_color.x);
|
||||
int max_g = ftoi_ceil(63 * max_color.y);
|
||||
int max_b = ftoi_ceil(31 * max_color.z);
|
||||
|
||||
// Expand the box.
|
||||
int range_r = max_r - min_r;
|
||||
int range_g = max_g - min_g;
|
||||
int range_b = max_b - min_b;
|
||||
|
||||
min_r = max(0, min_r - (range_r + 1) / 1 - 1);
|
||||
min_g = max(0, min_g - (range_g + 1) / 1 - 1);
|
||||
min_b = max(0, min_b - (range_b + 1) / 1 - 1);
|
||||
|
||||
max_r = min(31, max_r + (range_r + 1) / 2 + 1);
|
||||
max_g = min(63, max_g + (range_g + 1) / 2 + 1);
|
||||
max_b = min(31, max_b + (range_b + 1) / 2 + 1);
|
||||
|
||||
// Estimate size of search space.
|
||||
int volume = (max_r-min_r+1) * (max_g-min_g+1) * (max_b-min_b+1);
|
||||
|
||||
// if size under search_limit, then proceed. Note that search_limit is sqrt of number of evaluations.
|
||||
if (volume > max_volume) {
|
||||
return FLT_MAX;
|
||||
}
|
||||
|
||||
Color32 colors32[16];
|
||||
for (int i = 0; i < count; i++) {
|
||||
colors32[i] = toColor32(Vector4(colors[i], 1));
|
||||
}
|
||||
|
||||
float best_error = FLT_MAX;
|
||||
Color32 best0, best1;
|
||||
|
||||
for(int r0 = min_r; r0 <= max_r; r0++)
|
||||
for(int r1 = max_r; r1 >= r0; r1--)
|
||||
for(int g0 = min_g; g0 <= max_g; g0++)
|
||||
for(int g1 = max_g; g1 >= g0; g1--)
|
||||
for(int b0 = min_b; b0 <= max_b; b0++)
|
||||
for(int b1 = max_b; b1 >= b0; b1--)
|
||||
{
|
||||
Color32 palette[4];
|
||||
palette[0] = bitexpand_color16_to_color32(r1, g1, b1);
|
||||
palette[1] = bitexpand_color16_to_color32(r0, g0, b0);
|
||||
|
||||
// Evaluate error in 4 color mode.
|
||||
evaluate_palette4(palette);
|
||||
|
||||
float error = evaluate_palette_error(palette, colors32, weights, count);
|
||||
|
||||
if (error < best_error) {
|
||||
best_error = error;
|
||||
best0 = palette[0];
|
||||
best1 = palette[1];
|
||||
}
|
||||
|
||||
#if 0
|
||||
// Evaluate error in 3 color mode.
|
||||
evaluate_palette3(palette);
|
||||
|
||||
float error = evaluate_palette_error(palette, colors, weights, count);
|
||||
|
||||
if (error < best_error) {
|
||||
best_error = error;
|
||||
best0 = palette[1];
|
||||
best1 = palette[0];
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
output->col0 = truncate_color32_to_color16(best0);
|
||||
output->col1 = truncate_color32_to_color16(best1);
|
||||
|
||||
if (output->col0.u <= output->col1.u) {
|
||||
//output->indices = computeIndices3(colors, best0, best1);
|
||||
}
|
||||
else {
|
||||
//output->indices = computeIndices4(colors, best0, best1);
|
||||
}
|
||||
|
||||
return FLT_MAX;
|
||||
}
|
||||
|
||||
|
||||
float nv::compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output)
|
||||
{
|
||||
ClusterFit fit;
|
||||
//fit.setColorWeights(compressionOptions.colorWeight);
|
||||
fit.setColorWeights(Vector4(1)); // @@ Set color weights.
|
||||
fit.setColorSet(colors, weights, count);
|
||||
|
||||
// start & end are in [0, 1] range.
|
||||
Vector3 start, end;
|
||||
fit.compress4(&start, &end);
|
||||
|
||||
if (fit.compress3(&start, &end)) {
|
||||
//output_block3(input_colors, start, end, block);
|
||||
// @@ Output block.
|
||||
}
|
||||
else {
|
||||
//output_block4(input_colors, start, end, block);
|
||||
// @@ Output block.
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
float nv::compress_dxt1(const Vector3 input_colors[16], const float input_weights[16], BlockDXT1 * output)
|
||||
{
|
||||
Vector3 colors[16];
|
||||
float weights[16];
|
||||
int count = reduce_colors(input_colors, input_weights, colors, weights);
|
||||
|
||||
if (count == 0) {
|
||||
// Output trivial block.
|
||||
output->col0.u = 0;
|
||||
output->col1.u = 0;
|
||||
output->indices = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (count == 1) {
|
||||
return compress_dxt1_single_color_optimal(colors[0], output);
|
||||
}
|
||||
|
||||
// If high quality:
|
||||
//error = compress_dxt1_bounding_box_exhaustive(colors, weigths, count, 3200, error, output);
|
||||
//if (error < FLT_MAX) return error;
|
||||
|
||||
// This is pretty fast and in some cases can produces better quality than cluster fit.
|
||||
// error = compress_dxt1_least_squares_fit(colors, weigths, error, output);
|
||||
|
||||
//
|
||||
float error = compress_dxt1_cluster_fit(input_colors, colors, weights, count, output);
|
||||
|
||||
return error;
|
||||
}
|
||||
|
@ -0,0 +1,38 @@
|
||||
|
||||
namespace nv {
|
||||
|
||||
class Color32;
|
||||
struct ColorBlock;
|
||||
struct BlockDXT1;
|
||||
class Vector3;
|
||||
|
||||
// All these functions return MSE.
|
||||
|
||||
// Optimal compressors:
|
||||
/*float compress_dxt1_single_color_optimal(const Color32 & rgb, BlockDXT1 * output);
|
||||
float compress_dxt1_single_color_optimal(const ColorBlock & input, BlockDXT1 * output);
|
||||
float compress_dxt1_optimal(const ColorBlock & input, BlockDXT1 * output);
|
||||
|
||||
|
||||
|
||||
// Brute force with restricted search space:
|
||||
float compress_dxt1_bounding_box_exhaustive(const ColorBlock & input, BlockDXT1 * output);
|
||||
float compress_dxt1_best_fit_line_exhaustive(const ColorBlock & input, BlockDXT1 * output);
|
||||
|
||||
|
||||
// Fast least squres fitting compressors:
|
||||
float compress_dxt1_least_squares_fit(const ColorBlock & input, BlockDXT1 * output);
|
||||
float compress_dxt1_least_squares_fit_iterative(const ColorBlock & input, BlockDXT1 * output);
|
||||
*/
|
||||
|
||||
float compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output);
|
||||
float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output);
|
||||
|
||||
float compress_dxt1_least_squares_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output);
|
||||
float compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, int search_limit, BlockDXT1 * output);
|
||||
float compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output);
|
||||
|
||||
|
||||
float compress_dxt1(const Vector3 colors[16], const float weights[16], BlockDXT1 * output);
|
||||
|
||||
}
|
Loading…
Reference in New Issue