2308 lines
70 KiB
C++
2308 lines
70 KiB
C++
|
|
#include "CompressorETC.h"
|
|
|
|
#include "nvmath/Vector.inl"
|
|
#include "nvmath/Matrix.inl"
|
|
#include "nvmath/Color.inl"
|
|
#include "nvcore/Utils.h" // clamp
|
|
|
|
#define HAVE_RGETC NV_OS_OSX
|
|
#define HAVE_ETCPACK 0 // Only enable in OSX for debugging.
|
|
|
|
#if HAVE_RGETC
|
|
#include "rg_etc1.h"
|
|
#endif
|
|
|
|
#if HAVE_ETCPACK
|
|
// From etcpack.cxx
|
|
extern void decompressBlockETC2(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty);
|
|
extern void decompressBlockAlpha(uint8* data, uint8* img, int width, int height, int ix, int iy);
|
|
extern void decompressBlockAlpha16bit(uint8* data, uint8* img, int width, int height, int ix, int iy);
|
|
extern int formatSigned;
|
|
#endif
|
|
|
|
#define assert nvCheck
|
|
|
|
using namespace nv;
|
|
|
|
// TODO:
|
|
// - Accurate rounding of signed 3-bit components.
|
|
// - Range based table selection.
|
|
// - Slower try all options table selection?
|
|
// - Trivial selector assignment.
|
|
// * Base point optimization.
|
|
// * Brute force base point optimization.
|
|
// - Enumerate and evaluate all clusters.
|
|
// - Brute force planar mode endpoint refinement. For each color try two rounding directions (8 tests).
|
|
// - T & H modes decompression.
|
|
|
|
union BlockETC {
|
|
// Definitions from EtcLib/EtcBlock4x4EncodingBits.h
|
|
struct Individual {
|
|
uint red2 : 4; // byte 0
|
|
uint red1 : 4;
|
|
uint green2 : 4; // byte 1
|
|
uint green1 : 4;
|
|
uint blue2 : 4; // byte 2
|
|
uint blue1 : 4;
|
|
uint flip : 1; // byte 3
|
|
uint diff : 1;
|
|
uint cw2 : 3;
|
|
uint cw1 : 3;
|
|
uint selectors; // bytes 4-7
|
|
};
|
|
NV_COMPILER_CHECK(sizeof(BlockETC::Individual) == 64/8);
|
|
|
|
struct Differential {
|
|
uint dred2 : 3; // byte 0
|
|
uint red1 : 5;
|
|
uint dgreen2 : 3; // byte 1
|
|
uint green1 : 5;
|
|
uint dblue2 : 3; // byte 2
|
|
uint blue1 : 5;
|
|
uint flip : 1; // byte 3
|
|
uint diff : 1;
|
|
uint cw2 : 3;
|
|
uint cw1 : 3;
|
|
uint selectors; // bytes 4-7
|
|
};
|
|
NV_COMPILER_CHECK(sizeof(Differential) == 64/8);
|
|
|
|
struct T {
|
|
uint red1b : 2; // byte 0
|
|
uint detect2 : 1;
|
|
uint red1a : 2;
|
|
uint detect1 : 3;
|
|
uint blue1 : 4; // byte 1
|
|
uint green1 : 4;
|
|
uint green2 : 4; // byte 2
|
|
uint red2 : 4;
|
|
uint db : 1; // byte 3
|
|
uint diff : 1;
|
|
uint da : 2;
|
|
uint blue2 : 4;
|
|
uint selectors; // bytes 4-7
|
|
};
|
|
NV_COMPILER_CHECK(sizeof(T) == 64/8);
|
|
|
|
struct H {
|
|
uint green1a : 3; // byte 0
|
|
uint red1 : 4;
|
|
uint detect1 : 1;
|
|
uint blue1b : 2; // byte 1
|
|
uint detect3 : 1;
|
|
uint blue1a : 1;
|
|
uint green1b : 1;
|
|
uint detect2 : 3;
|
|
uint green2a : 3; // byte 2
|
|
uint red2 : 4;
|
|
uint blue1c : 1;
|
|
uint db : 1; // byte 3
|
|
uint diff : 1;
|
|
uint da : 1;
|
|
uint blue2 : 4;
|
|
uint green2b : 1;
|
|
uint selectors; // bytes 4-7
|
|
};
|
|
NV_COMPILER_CHECK(sizeof(H) == 64/8);
|
|
|
|
struct Planar {
|
|
uint originGreen1 : 1; // byte 0
|
|
uint originRed : 6;
|
|
uint detect1 : 1;
|
|
uint originBlue1 : 1; // byte 1
|
|
uint originGreen2 : 6;
|
|
uint detect2 : 1;
|
|
uint originBlue3 : 2; // byte 2
|
|
uint detect4 : 1;
|
|
uint originBlue2 : 2;
|
|
uint detect3 : 3;
|
|
uint horizRed2 : 1; // byte 3
|
|
uint diff : 1;
|
|
uint horizRed1 : 5;
|
|
uint originBlue4 : 1;
|
|
uint horizBlue1: 1; // byte 4
|
|
uint horizGreen : 7;
|
|
uint vertRed1 : 3; // byte 5
|
|
uint horizBlue2 : 5;
|
|
uint vertGreen1 : 5; // byte 6
|
|
uint vertRed2 : 3;
|
|
uint vertBlue : 6; // byte 7
|
|
uint vertGreen2 : 2;
|
|
};
|
|
NV_COMPILER_CHECK(sizeof(Planar) == 64/8);
|
|
|
|
|
|
uint64 data64;
|
|
uint32 data32[2];
|
|
uint8 data8[8];
|
|
Individual individual;
|
|
Differential differential;
|
|
T t;
|
|
H h;
|
|
Planar planar;
|
|
};
|
|
NV_COMPILER_CHECK(sizeof(BlockETC) == 64/8);
|
|
|
|
|
|
|
|
static const int etc_intensity_modifiers[8][4] = {
|
|
{ -8, -2, 2, 8 },
|
|
{ -17, -5, 5, 17 },
|
|
{ -29, -9, 9, 29 },
|
|
{ -42, -13, 13, 42 },
|
|
{ -60, -18, 18, 60 },
|
|
{ -80, -24, 24, 80 },
|
|
{ -106, -33, 33, 106 },
|
|
{ -183, -47, 47, 183 }
|
|
};
|
|
|
|
static const int etc_intensity_range[8] = {
|
|
16, 34, 58, 84, 120, 160, 212, 366
|
|
};
|
|
|
|
static const int etc_th_distances[8] = { 3, 6, 11, 16, 23, 32, 41, 64 };
|
|
|
|
static const uint8 etc_selector_scramble[] = { 3, 2, 0, 1 };
|
|
static const uint8 etc_selector_unscramble[] = { 2, 3, 1, 0 };
|
|
|
|
|
|
static float midpoints4[16];
|
|
NV_AT_STARTUP(
|
|
for (int i = 0; i < 15; i++) {
|
|
float f0 = float(((i+0) << 4) | ((i+0) >> 4)) / 255.0f;
|
|
float f1 = float(((i+1) << 4) | ((i+1) >> 4)) / 255.0f;
|
|
midpoints4[i] = (f0 + f1) * 0.5f;
|
|
}
|
|
midpoints4[15] = 1.0f;
|
|
);
|
|
|
|
static const float midpoints5[32] = {
|
|
0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f,
|
|
0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f
|
|
};
|
|
|
|
//static const float midpoints6[64];
|
|
//static const float midpoints7[128];
|
|
|
|
|
|
|
|
// ETC2 Modes:
|
|
// - ETC1:
|
|
// - two partitions (flip modes): 2*(4x2, 2x4)
|
|
// - two base colors sotred as 444+444 or 555+333
|
|
// - two 3 bit intensity modifiers
|
|
// - T Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices.
|
|
// - H Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices.
|
|
// - Planar mode: 3 colors 676
|
|
|
|
struct ETC_Data {
|
|
enum Mode {
|
|
Mode_ETC1,
|
|
Mode_T,
|
|
Mode_H,
|
|
Mode_Planar,
|
|
} mode;
|
|
|
|
// @@ It may make more sense to store bit-expanded or even float colors here.
|
|
union {
|
|
struct {
|
|
uint16 color0; // 444 or 555
|
|
uint16 color1; // 444 or 333
|
|
uint8 table0; // 3 bits
|
|
uint8 table1; // 3 bits
|
|
bool flip; // partition mode
|
|
bool diff; // color encoding
|
|
} etc;
|
|
struct {
|
|
uint16 color0; // 444
|
|
uint16 color1; // 444
|
|
uint8 table; // 3 bits
|
|
} t, h;
|
|
struct {
|
|
uint8 ro, go, bo; // 676
|
|
uint8 rh, gh, bh; // 676
|
|
uint8 rv, gv, bv; // 676
|
|
} planar;
|
|
};
|
|
|
|
uint8 selector[16]; // 2 bit indices (32 bits)
|
|
};
|
|
|
|
struct ETC_Solution {
|
|
float error = NV_FLOAT_MAX;
|
|
ETC_Data data;
|
|
};
|
|
|
|
|
|
struct ETC_Options {
|
|
//bool fast_flip_mode_selection = false;
|
|
bool use_rg_etc = true;
|
|
bool enable_etc2 = true;
|
|
bool use_planar = true;
|
|
bool use_t_mode = true;
|
|
bool use_h_mode = true;
|
|
bool onebit_alpha = false;
|
|
Vector3 color_weights = Vector3(1);
|
|
|
|
//int8 eac_search_radius = 1; // [0-3]
|
|
//bool eac_11bit_mode = false;
|
|
};
|
|
|
|
/*static*/ float compress_etc(Vector4 input_colors[16], float input_weights[16], const ETC_Options & options, BlockETC * output);
|
|
|
|
|
|
struct BlockEAC {
|
|
uint base : 8;
|
|
uint table : 4;
|
|
uint multiplier : 4;
|
|
uint selectors0 : 8;
|
|
uint selectors1 : 8;
|
|
|
|
uint selectors2 : 8;
|
|
uint selectors3 : 8;
|
|
uint selectors4 : 8;
|
|
uint selectors5 : 8;
|
|
};
|
|
NV_COMPILER_CHECK(sizeof(BlockEAC) == 64/8);
|
|
|
|
struct BlockETC_EAC {
|
|
BlockEAC eac;
|
|
BlockETC etc;
|
|
};
|
|
NV_COMPILER_CHECK(sizeof(BlockETC_EAC) == 128/8);
|
|
|
|
// EAC:
|
|
// 8 bit base code word
|
|
// 4 bit multiplier
|
|
// 4 bit table index
|
|
// 16 * 3 bit indices.
|
|
|
|
struct EAC_Data {
|
|
uint8 alpha; // 8 bits
|
|
uint8 multiplier; // 4 bits
|
|
uint8 table_index; // 4 bits
|
|
uint8 selector[16]; // 3 bit indices
|
|
};
|
|
|
|
struct EAC_Solution {
|
|
float error = NV_FLOAT_MAX;
|
|
EAC_Data data;
|
|
};
|
|
|
|
struct EAC_Options {
|
|
int search_radius = 1; // 0 = fast, 1 = medium, 2 = slow
|
|
bool use_11bit_mode = false;
|
|
};
|
|
|
|
|
|
static const int eac_intensity_modifiers[16][8] = {
|
|
{-3, -6, -9, -15, 2, 5, 8, 14}, // 0
|
|
{-3, -7,-10, -13, 2, 6, 9, 12}, // 1
|
|
{-2, -5, -8, -13, 1, 4, 7, 12}, // 2
|
|
{-2, -4, -6, -13, 1, 3, 5, 12}, // 3
|
|
{-3, -6, -8, -12, 2, 5, 7, 11}, // 4
|
|
{-3, -7, -9, -11, 2, 6, 8, 10}, // 5
|
|
{-4, -7, -8, -11, 3, 6, 7, 10}, // 6
|
|
{-3, -5, -8, -11, 2, 4, 7, 10}, // 7
|
|
{-2, -6, -8, -10, 1, 5, 7, 9 }, // 8
|
|
{-2, -5, -8, -10, 1, 4, 7, 9 }, // 9
|
|
{-2, -4, -8, -10, 1, 3, 7, 9 }, // 10
|
|
{-2, -5, -7, -10, 1, 4, 6, 9 }, // 11
|
|
{-3, -4, -7, -10, 2, 3, 6, 9 }, // 12
|
|
{-1, -2, -3, -10, 0, 1, 2, 9 }, // 13
|
|
{-4, -6, -8, -9, 3, 5, 7, 8 }, // 14
|
|
{-3, -5, -7, -9, 2, 4, 6, 8 } // 15
|
|
};
|
|
|
|
|
|
|
|
|
|
static void pack_etc2_block(const ETC_Data & data, BlockETC * output_block) {
|
|
BlockETC block;
|
|
|
|
bool swap_colors = false;
|
|
|
|
if (data.mode == ETC_Data::Mode_ETC1) {
|
|
// These are the same for individual and differential blocks.
|
|
block.individual.diff = data.etc.diff;
|
|
block.individual.flip = data.etc.flip;
|
|
block.individual.cw1 = data.etc.table0;
|
|
block.individual.cw2 = data.etc.table1;
|
|
|
|
if (data.etc.diff) {
|
|
block.differential.red1 = data.etc.color0 >> 10;
|
|
block.differential.dred2 = data.etc.color1 >> 6;
|
|
block.differential.green1 = (data.etc.color0 >> 5) & 0x1F;
|
|
block.differential.dgreen2 = (data.etc.color1 >> 3) & 0x7;
|
|
block.differential.blue1 = data.etc.color0 & 0x1F;
|
|
block.differential.dblue2 = data.etc.color1 & 0x7;
|
|
}
|
|
else {
|
|
block.individual.red1 = data.etc.color0 >> 8;
|
|
block.individual.red2 = data.etc.color1 >> 8;
|
|
block.individual.green1 = (data.etc.color0 >> 4) & 0xF;
|
|
block.individual.green2 = (data.etc.color1 >> 4) & 0xF;
|
|
block.individual.blue1 = data.etc.color0 & 0xF;
|
|
block.individual.blue2 = data.etc.color1 & 0xF;
|
|
}
|
|
}
|
|
else if (data.mode == ETC_Data::Mode_T) {
|
|
block.t.red1a = (data.t.color0 >> 8) >> 2;
|
|
block.t.red1b = (data.t.color0 >> 8);
|
|
block.t.green1 = (data.t.color0 >> 4);
|
|
block.t.blue1 = data.t.color0;
|
|
|
|
block.t.red2 = (data.t.color1 >> 8);
|
|
block.t.green2 = (data.t.color1 >> 4);
|
|
block.t.blue2 = data.t.color1;
|
|
|
|
block.t.da = data.t.table >> 1;
|
|
block.t.db = data.t.table;
|
|
|
|
block.t.diff = 1;
|
|
|
|
// create an invalid R differential to trigger T mode
|
|
int dr = block.differential.dred2;
|
|
if (dr >= 4) dr -= 8;
|
|
int r = block.differential.red1 + dr;
|
|
|
|
block.t.detect1 = 0;
|
|
block.t.detect2 = 1;
|
|
if (r >= 4) {
|
|
block.t.detect1 = 7;
|
|
block.t.detect2 = 0;
|
|
}
|
|
}
|
|
else if (data.mode == ETC_Data::Mode_H) {
|
|
bool table_lsb = data.h.table & 1;
|
|
swap_colors = (data.h.color0 < data.h.color1) ^ !table_lsb;
|
|
|
|
uint16 color0 = data.h.color0;
|
|
uint16 color1 = data.h.color1;
|
|
|
|
if (swap_colors) {
|
|
swap(color0, color1);
|
|
}
|
|
|
|
block.h.red1 = (data.h.color0 >> 8);
|
|
block.h.green1a = (data.h.color0 >> 4) >> 1;
|
|
block.h.green1b = (data.h.color0 >> 4);
|
|
block.h.blue1a = data.h.color0 >> 3;
|
|
block.h.blue1b = data.h.color0 >> 1;
|
|
block.h.blue1c = data.h.color0;
|
|
|
|
block.h.red2 = (data.h.color1 >> 8);
|
|
block.h.green2a = (data.h.color1 >> 4) >> 1;
|
|
block.h.green2b = (data.h.color1 >> 4);
|
|
block.h.blue2 = (data.h.color1 >> 8);
|
|
|
|
block.h.da = data.h.table >> 2;
|
|
block.h.db = data.h.table >> 1;
|
|
|
|
block.h.diff = 1;
|
|
|
|
// create an invalid R differential to trigger T mode
|
|
block.h.detect1 = 0;
|
|
block.h.detect2 = 0;
|
|
block.h.detect3 = 0;
|
|
|
|
int dr = block.differential.dred2;
|
|
int dg = block.differential.dgreen2;
|
|
if (dr >= 4) dr -= 8;
|
|
if (dg >= 4) dg -= 8;
|
|
int r = block.differential.red1 + dr;
|
|
int g = block.differential.green1 + dg;
|
|
|
|
if (r < 0 || r > 31) {
|
|
block.h.detect1 = 1;
|
|
}
|
|
if (g >= 4) {
|
|
block.h.detect2 = 7;
|
|
block.h.detect3 = 0;
|
|
}
|
|
else {
|
|
block.h.detect2 = 0;
|
|
block.h.detect3 = 1;
|
|
}
|
|
}
|
|
|
|
if (data.mode == ETC_Data::Mode_Planar) {
|
|
// From ETCLib:
|
|
block.planar.originRed = data.planar.ro;
|
|
block.planar.originGreen1 = data.planar.go >> 6;
|
|
block.planar.originGreen2 = data.planar.go;
|
|
block.planar.originBlue1 = data.planar.bo >> 5;
|
|
block.planar.originBlue2 = data.planar.bo >> 3;
|
|
block.planar.originBlue3 = data.planar.bo >> 1;
|
|
block.planar.originBlue4 = data.planar.bo;
|
|
|
|
block.planar.horizRed1 = data.planar.rh >> 1;
|
|
block.planar.horizRed2 = data.planar.rh;
|
|
block.planar.horizGreen = data.planar.gh;
|
|
block.planar.horizBlue1 = data.planar.bh >> 5;
|
|
block.planar.horizBlue2 = data.planar.bh;
|
|
|
|
block.planar.vertRed1 = data.planar.rv >> 3;
|
|
block.planar.vertRed2 = data.planar.rv;
|
|
block.planar.vertGreen1 = data.planar.gv >> 2;
|
|
block.planar.vertGreen2 = data.planar.gv;
|
|
block.planar.vertBlue = data.planar.bv;
|
|
|
|
block.planar.diff = 1;
|
|
|
|
// create valid RG differentials and an invalid B differential to trigger planar mode
|
|
block.planar.detect1 = 0;
|
|
block.planar.detect2 = 0;
|
|
block.planar.detect3 = 0;
|
|
block.planar.detect4 = 0;
|
|
|
|
// @@ Clean this up.
|
|
int dr = block.differential.dred2;
|
|
int dg = block.differential.dgreen2;
|
|
int db = block.differential.dblue2;
|
|
if (dr >= 4) dr -= 8;
|
|
if (dg >= 4) dg -= 8;
|
|
if (db >= 4) db -= 8;
|
|
int r = block.differential.red1 + dr;
|
|
int g = block.differential.green1 + dg;
|
|
int b = block.differential.blue1 + db;
|
|
|
|
if (r < 0 || r > 31) {
|
|
block.planar.detect1 = 1;
|
|
}
|
|
if (g < 0 || g > 31) {
|
|
block.planar.detect2 = 1;
|
|
}
|
|
if (b >= 4) {
|
|
block.planar.detect3 = 7;
|
|
block.planar.detect4 = 0;
|
|
}
|
|
else {
|
|
block.planar.detect3 = 0;
|
|
block.planar.detect4 = 1;
|
|
}
|
|
}
|
|
else {
|
|
block.individual.selectors = 0;
|
|
for (int i = 0; i < 16; i++) {
|
|
uint selector = data.selector[i];
|
|
selector = etc_selector_scramble[selector];
|
|
block.individual.selectors |= (selector >> 1) << (i ^ 8);
|
|
block.individual.selectors |= (selector & 1) << ((16 + i) ^ 8);
|
|
}
|
|
|
|
if (swap_colors) {
|
|
block.individual.selectors ^= 0x0000FFFF;
|
|
}
|
|
}
|
|
|
|
// @@ output_block is big endian, byte swap:
|
|
*output_block = block;
|
|
}
|
|
|
|
static void unpack_etc2_block(const BlockETC * input_block, ETC_Data * data) {
|
|
|
|
// @@ input_block is big endian, byte swap first:
|
|
BlockETC block = *input_block;
|
|
|
|
// Assume ETC1 for now.
|
|
data->mode = ETC_Data::Mode_ETC1;
|
|
|
|
// These are the same for individual and differential blocks.
|
|
data->etc.diff = block.individual.diff != 0;
|
|
data->etc.flip = block.individual.flip != 0;
|
|
data->etc.table0 = block.individual.cw1;
|
|
data->etc.table1 = block.individual.cw2;
|
|
|
|
// Decode colors.
|
|
if (data->etc.diff) {
|
|
data->etc.color0 = U16((block.differential.red1 << 10) | (block.differential.green1 << 5) | block.differential.blue1);
|
|
data->etc.color1 = U16((block.differential.dred2 << 6) | (block.differential.dgreen2 << 3) | block.differential.dblue2);
|
|
|
|
// @@ Clean this up.
|
|
int dr = block.differential.dred2;
|
|
int dg = block.differential.dgreen2;
|
|
int db = block.differential.dblue2;
|
|
if (dr >= 4) dr -= 8;
|
|
if (dg >= 4) dg -= 8;
|
|
if (db >= 4) db -= 8;
|
|
int r = block.differential.red1 + dr;
|
|
int g = block.differential.green1 + dg;
|
|
int b = block.differential.blue1 + db;
|
|
|
|
// Detect ETC2 modes (invalid combinations).
|
|
if (r < 0 || r > 31) {
|
|
data->mode = ETC_Data::Mode_T;
|
|
}
|
|
else if (g < 0 || g > 31) {
|
|
data->mode = ETC_Data::Mode_H;
|
|
}
|
|
else if (b < 0 || b > 31) {
|
|
data->mode = ETC_Data::Mode_Planar;
|
|
}
|
|
}
|
|
else {
|
|
data->etc.color0 = U16((block.individual.red1 << 8) | (block.individual.green1 << 4) | block.individual.blue1);
|
|
data->etc.color1 = U16((block.individual.red2 << 8) | (block.individual.green2 << 4) | block.individual.blue2);
|
|
}
|
|
|
|
if (data->mode == ETC_Data::Mode_T) {
|
|
uint16 r0 = U16((block.t.red1a << 2) | block.t.red1b);
|
|
uint16 g0 = U16(block.t.green1);
|
|
uint16 b0 = U16(block.t.blue1);
|
|
data->t.color0 = U16(r0 << 8) | U16(g0 << 4) | b0;
|
|
|
|
uint16 r1 = U16(block.t.red2);
|
|
uint16 g1 = U16(block.t.green2);
|
|
uint16 b1 = U16(block.t.blue2);
|
|
data->t.color1 = U16(r1 << 8) | U16(g1 << 4) | b1;
|
|
|
|
data->t.table = U8((block.t.da << 1) | block.t.db);
|
|
}
|
|
else if (data->mode == ETC_Data::Mode_H) {
|
|
uint16 r0 = U16(block.h.red1);
|
|
uint16 g0 = U16((block.h.green1a << 1) | block.h.green1b);
|
|
uint16 b0 = U16((block.h.blue1a << 3) | (block.h.blue1b << 1) | block.h.blue1c);
|
|
data->h.color0 = U16(r0 << 8) | U16(g0 << 4) | b0;
|
|
|
|
uint16 r1 = U16(block.h.red2);
|
|
uint16 g1 = U16((block.h.green2a << 1) | block.h.green2b);
|
|
uint16 b1 = U16(block.h.blue2);
|
|
data->h.color1 = U16(r1 << 8) | U16(g1 << 4) | b1;
|
|
|
|
data->h.table = U8((block.h.da << 2) | (block.h.db << 1));
|
|
|
|
if (data->h.color0 >= data->h.color1) {
|
|
data->h.table++;
|
|
}
|
|
}
|
|
|
|
if (data->mode == ETC_Data::Mode_Planar) {
|
|
data->planar.ro = U8(block.planar.originRed);
|
|
data->planar.go = U8((block.planar.originGreen1 << 6) + block.planar.originGreen2);
|
|
data->planar.bo = U8((block.planar.originBlue1 << 5) + (block.planar.originBlue2 << 3) + (block.planar.originBlue3 << 1) + block.planar.originBlue4);
|
|
|
|
data->planar.rh = U8((block.planar.horizRed1 << 1) + block.planar.horizRed2);
|
|
data->planar.gh = U8(block.planar.horizGreen);
|
|
data->planar.bh = U8((block.planar.horizBlue1 << 5) + block.planar.horizBlue2);
|
|
|
|
data->planar.rv = U8((block.planar.vertRed1 << 3) + block.planar.vertRed2);
|
|
data->planar.gv = U8((block.planar.vertGreen1 << 2) + block.planar.vertGreen2);
|
|
data->planar.bv = U8(block.planar.vertBlue);
|
|
}
|
|
else {
|
|
// Note, selectors are arranged in columns, keep that order.
|
|
unsigned char * selectors = (uint8 *)&block.individual.selectors;
|
|
for (int i = 0; i < 16; i++) {
|
|
int byte_msb = (1 - (i / 8));
|
|
int byte_lsb = (3 - (i / 8));
|
|
int shift = (i & 7);
|
|
|
|
uint msb = (selectors[byte_msb] >> shift) & 1;
|
|
uint lsb = (selectors[byte_lsb] >> shift) & 1;
|
|
|
|
uint index = (msb << 1) | lsb;
|
|
|
|
if (data->mode == ETC_Data::Mode_ETC1) {
|
|
data->selector[i] = etc_selector_unscramble[index];
|
|
}
|
|
else {
|
|
// No scrambling in T & H modes.
|
|
data->selector[i] = index;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
static void pack_eac_block(const EAC_Data & data, BlockEAC * output_block) {
|
|
|
|
output_block->base = data.alpha;
|
|
output_block->table = data.table_index;
|
|
output_block->multiplier = data.multiplier;
|
|
|
|
uint64 selector_bits = 0;
|
|
for (uint i = 0; i < 16; i++) {
|
|
uint shift = 45 - (3 * i);
|
|
selector_bits |= uint64(data.selector[i]) << shift;
|
|
}
|
|
|
|
output_block->selectors0 = selector_bits >> 40;
|
|
output_block->selectors1 = selector_bits >> 32;
|
|
output_block->selectors2 = selector_bits >> 24;
|
|
output_block->selectors3 = selector_bits >> 16;
|
|
output_block->selectors4 = selector_bits >> 8;
|
|
output_block->selectors5 = selector_bits >> 0;
|
|
}
|
|
|
|
static void unpack_eac_block(const BlockEAC * input_block, EAC_Data * data) {
|
|
|
|
data->alpha = input_block->base;
|
|
data->table_index = input_block->table;
|
|
data->multiplier = input_block->multiplier;
|
|
|
|
uint64 selector_bits = 0;
|
|
selector_bits |= uint64(input_block->selectors0) << 40;
|
|
selector_bits |= uint64(input_block->selectors1) << 32;
|
|
selector_bits |= uint64(input_block->selectors2) << 24;
|
|
selector_bits |= uint64(input_block->selectors3) << 16;
|
|
selector_bits |= uint64(input_block->selectors4) << 8;
|
|
selector_bits |= uint64(input_block->selectors5) << 0;
|
|
for (uint i = 0; i < 16; i++) {
|
|
uint shift = 45 - (3 * i);
|
|
data->selector[i] = (selector_bits >> shift) & 0x7;
|
|
}
|
|
}
|
|
|
|
|
|
|
|
// This assumes nin > nout-nin
|
|
inline int bitexpand(uint32 bits, uint nin, uint nout) {
|
|
assert(nout > nin);
|
|
//assert(nout - nin > nin);
|
|
return (bits << uint(nout - nin)) | (bits >> uint(2U * nin - nout));
|
|
}
|
|
|
|
// Integer color unpacking for decompressor.
|
|
static void unpack_color_444(uint32 packed_color, int * r, int * g, int * b) {
|
|
int r4 = (packed_color >> 8) & 0xF;
|
|
int g4 = (packed_color >> 4) & 0xF;
|
|
int b4 = packed_color & 0xF;
|
|
*r = r4 << 4 | r4; // bitexpand(r4, 4, 8);
|
|
*g = g4 << 4 | g4; // bitexpand(g4, 4, 8);
|
|
*b = b4 << 4 | b4; // bitexpand(b4, 4, 8);
|
|
}
|
|
|
|
static Vector3 unpack_color_444(uint32 packed_color) {
|
|
int r, g, b;
|
|
unpack_color_444(packed_color, &r, &g, &b);
|
|
return Vector3(float(r), float(g), float(b)) * 1.0f / 255.0f;
|
|
}
|
|
|
|
static void unpack_color_555(uint32 packed_color, int * r, int * g, int * b) {
|
|
int r5 = (packed_color >> 10) & 0x1F;
|
|
int g5 = (packed_color >> 5) & 0x1F;
|
|
int b5 = packed_color & 0x1F;
|
|
*r = (r5 << 3) | (r5 >> 2); // bitexpand(r5, 5, 8);
|
|
*g = (g5 << 3) | (g5 >> 2); // bitexpand(g5, 5, 8);
|
|
*b = (b5 << 3) | (b5 >> 2); // bitexpand(b5, 5, 8);
|
|
}
|
|
|
|
static Vector3 unpack_color_555(uint32 packed_color) {
|
|
int r, g, b;
|
|
unpack_color_555(packed_color, &r, &g, &b);
|
|
return Vector3(float(r), float(g), float(b)) * 1.0f / 255.0f;
|
|
}
|
|
|
|
// Returns signed r,g,b without bit expansion.
|
|
static void unpack_delta_333(uint32 packed_delta, int * r, int * g, int * b) {
|
|
*r = (packed_delta >> 6) & 7;
|
|
*g = (packed_delta >> 3) & 7;
|
|
*b = packed_delta & 7;
|
|
if (*r >= 4) *r -= 8;
|
|
if (*g >= 4) *g -= 8;
|
|
if (*b >= 4) *b -= 8;
|
|
}
|
|
|
|
static bool unpack_color_555(uint32 packed_color, uint32 packed_delta, int * r, int * g, int * b) {
|
|
int dc_r, dc_g, dc_b;
|
|
unpack_delta_333(packed_delta, &dc_r, &dc_g, &dc_b);
|
|
|
|
int r5 = int((packed_color >> 10U) & 0x1F) + dc_r;
|
|
int g5 = int((packed_color >> 5U) & 0x1F) + dc_g;
|
|
int b5 = int(packed_color & 0x1F) + dc_b;
|
|
|
|
bool success = true;
|
|
if (static_cast<uint>(r5 | g5 | b5) > 31U)
|
|
{
|
|
success = false;
|
|
r5 = clamp(r5, 0, 31);
|
|
g5 = clamp(g5, 0, 31);
|
|
b5 = clamp(b5, 0, 31);
|
|
}
|
|
|
|
*r = (r5 << 3) | (r5 >> 2); // bitexpand(r5, 5, 8);
|
|
*g = (g5 << 3) | (g5 >> 2); // bitexpand(g5, 5, 8);
|
|
*b = (b5 << 3) | (b5 >> 2); // bitexpand(b5, 5, 8);
|
|
|
|
return success;
|
|
}
|
|
|
|
static Vector3 unpack_color_555(uint32 packed_color, uint32 packed_delta) {
|
|
int r, g, b;
|
|
bool success = unpack_color_555(packed_color, packed_delta, &r, &g, &b);
|
|
assert(success);
|
|
return Vector3(float(r), float(g), float(b)) * 1.0f / 255.0f;
|
|
}
|
|
|
|
|
|
static void unpack_color_676(uint32 packed_color, int * r, int * g, int * b) {
|
|
int r6 = (packed_color >> 13) & 0x3F;
|
|
int g7 = (packed_color >> 6) & 0x7F;
|
|
int b6 = packed_color & 0x3F;
|
|
|
|
*r = bitexpand(r6, 6, 8); // r << 2 | r >> 4
|
|
*g = bitexpand(g7, 7, 8); // g << 1 | g >> 6
|
|
*b = bitexpand(b6, 6, 8); // b << 2 | b >> 4
|
|
}
|
|
|
|
|
|
static uint32 pack_color_444(Vector3 color) {
|
|
|
|
// Truncate.
|
|
uint r = U32(ftoi_trunc(clamp(color.x * 15.0f, 0.0f, 15.0f)));
|
|
uint g = U32(ftoi_trunc(clamp(color.y * 15.0f, 0.0f, 15.0f)));
|
|
uint b = U32(ftoi_trunc(clamp(color.z * 15.0f, 0.0f, 15.0f)));
|
|
|
|
// Round exactly according to 444 bit-expansion.
|
|
r += (color.x > midpoints4[r]);
|
|
g += (color.y > midpoints4[g]);
|
|
b += (color.z > midpoints4[b]);
|
|
|
|
return (r << 8) | (g << 4) | b;
|
|
}
|
|
|
|
static uint32 pack_color_555(Vector3 color) {
|
|
|
|
// Truncate.
|
|
uint r = U32(ftoi_trunc(clamp(color.x * 31.0f, 0.0f, 31.0f)));
|
|
uint g = U32(ftoi_trunc(clamp(color.y * 31.0f, 0.0f, 31.0f)));
|
|
uint b = U32(ftoi_trunc(clamp(color.z * 31.0f, 0.0f, 31.0f)));
|
|
|
|
// Round exactly according to 555 bit-expansion.
|
|
r += (color.x > midpoints5[r]);
|
|
g += (color.y > midpoints5[g]);
|
|
b += (color.z > midpoints5[b]);
|
|
|
|
return (r << 10) | (g << 5) | b;
|
|
}
|
|
|
|
static uint32 pack_delta_333(Vector3 delta) {
|
|
// @@ Accurate rounding of signed 3-bit components.
|
|
|
|
int r = ftoi_round(clamp(delta.x * 31.0f, -4.0f, 3.0f));
|
|
int g = ftoi_round(clamp(delta.y * 31.0f, -4.0f, 3.0f));
|
|
int b = ftoi_round(clamp(delta.z * 31.0f, -4.0f, 3.0f));
|
|
|
|
//r += (delta.x > delta_midpoints3[r]);
|
|
//g += (delta.y > delta_midpoints3[g]);
|
|
//b += (delta.z > delta_midpoints3[b]);
|
|
|
|
if (r < 0) r += 8;
|
|
if (g < 0) g += 8;
|
|
if (b < 0) b += 8;
|
|
return static_cast<uint16>(b | (g << 3) | (r << 6));
|
|
}
|
|
|
|
static uint8 pack_float_6(float f) {
|
|
|
|
// Truncate.
|
|
uint u = U32(ftoi_trunc(clamp(f * 63.0f, 0.0f, 63.0f)));
|
|
|
|
// Round exactly according to 6 bit-expansion.
|
|
//u += (f > midpoints6[u]);
|
|
|
|
float midpoint = 0.5f * (bitexpand(u, 6, 8) + bitexpand(min(u + 1, 63U), 6, 8)); // @@ Precompute.
|
|
u += (f > midpoint);
|
|
|
|
return U8(u);
|
|
}
|
|
|
|
static uint8 pack_float_7(float f) {
|
|
|
|
// Truncate.
|
|
uint u = U32(ftoi_trunc(clamp(f * 127.0f, 0.0f, 127.0f)));
|
|
|
|
// Round exactly according to 6 bit-expansion.
|
|
//u += (f > midpoints7[u]);
|
|
|
|
float midpoint = 0.5f * (bitexpand(u, 7, 8) + bitexpand(min(u + 1, 127U), 7, 8)); // @@ Precompute.
|
|
u += (f > midpoint);
|
|
|
|
return U8(u);
|
|
}
|
|
|
|
static uint8 pack_float_6(float f, bool round_dir) {
|
|
uint u = U32(ftoi_trunc(clamp(f * 63.0f + round_dir, 0.0f, 63.0f)));
|
|
return U8(u);
|
|
}
|
|
|
|
static uint8 pack_float_7(float f, bool round_dir) {
|
|
uint u = U32(ftoi_trunc(clamp(f * 127.0f + round_dir, 0.0f, 127.0f)));
|
|
return U8(u);
|
|
}
|
|
|
|
|
|
|
|
|
|
Vector3 get_partition_color_average(const Vector4 input_colors[16], const float input_weights[16], bool flip, int partition) {
|
|
Vector3 sum_c(0);
|
|
float sum_w = 0;
|
|
|
|
if (flip) {
|
|
// Horizontal partition.
|
|
int offset = partition ? 8 : 0;
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
sum_c += input_colors[i+offset].xyz() * input_weights[i+offset];
|
|
sum_w += input_weights[i+offset];
|
|
}
|
|
}
|
|
else {
|
|
// Vertical partition.
|
|
int offset = partition ? 2 : 0;
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
sum_c += input_colors[i+offset].xyz() * input_weights[i+offset];
|
|
sum_w += input_weights[i+offset];
|
|
|
|
sum_c += input_colors[i+offset+1].xyz() * input_weights[i+offset+1];
|
|
sum_w += input_weights[i+offset+1];
|
|
|
|
offset += 2;
|
|
}
|
|
}
|
|
if (sum_w == 0) {
|
|
sum_w = 1;
|
|
}
|
|
|
|
return sum_c * 1.0f / sum_w;
|
|
}
|
|
|
|
// Approximate partition color using average.
|
|
Vector3 base_color_average(const Vector3 colors[8]) {
|
|
Vector3 sum_c(0);
|
|
|
|
for (uint i = 0; i < 8; i++) {
|
|
sum_c += colors[i];
|
|
}
|
|
|
|
return sum_c * 1.0f / 8.0f;
|
|
}
|
|
Vector3 base_color_average(const Vector3 colors[8], const float weights[8]) {
|
|
Vector3 sum_c(0);
|
|
float sum_w = 0;
|
|
|
|
for (uint i = 0; i < 8; i++) {
|
|
sum_c += colors[i] * weights[i];
|
|
sum_w += weights[i];
|
|
}
|
|
|
|
return sum_c * 1.0f / sum_w;
|
|
}
|
|
|
|
#if 0
|
|
// Compute base color using least squares.
|
|
Vector3 base_color_least_squares(const Vector3 colors[8], int table_index, int indices[8]) {
|
|
|
|
// Compute dot(C, I) and dot(I, I)
|
|
Vector3 CI(0);
|
|
float II = 0;
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
Vector3 C = colors[i];
|
|
float I = etc_intensity_modifiers[table_index][indices[i]];
|
|
CI += C * I;
|
|
II += I * I;
|
|
}
|
|
|
|
return CI / II;
|
|
}
|
|
|
|
// @@ Do weighted least squares!
|
|
Vector3 base_color_least_squares(const Vector3 colors[8], const float weights[8], int table_index, int indices[8]) {
|
|
|
|
// Compute dot(C, I) and dot(I, I)
|
|
Vector3 CI(0);
|
|
float II = 0;
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
Vector3 C = colors[i];
|
|
float w = weights[i];
|
|
float I = etc_intensity_modifiers[table_index][indices[i]];
|
|
CI += C * I * w;
|
|
II += I * I;
|
|
}
|
|
|
|
return CI / II;
|
|
}
|
|
|
|
// Is this any faster than the above?
|
|
Vector3 base_color_least_squares(const Vector3 colors[8], int table_index, int c0, int c1, int c2) {
|
|
|
|
// Compute dot(C, I) and dot(I, I)
|
|
Vector3 CI(0);
|
|
|
|
float I0 = etc_intensity_modifiers[table_index][0];
|
|
float I1 = etc_intensity_modifiers[table_index][1];
|
|
float I2 = etc_intensity_modifiers[table_index][2];
|
|
float I3 = etc_intensity_modifiers[table_index][3];
|
|
|
|
float II = 0;
|
|
II += c0 * I0 * I0;
|
|
II += c1 * I1 * I1;
|
|
II += c2 * I2 * I2;
|
|
II += (8-c0-c1-c2) * I3 * I3;
|
|
|
|
int i = 0;
|
|
for (; i < c0; i++) CI += colors[i] * I0;
|
|
for (; i < c0+c1; i++) CI += colors[i] * I1;
|
|
for (; i < c0+c1+c2; i++) CI += colors[i] * I2;
|
|
for (; i < 8; i++) CI += colors[i] * I3;
|
|
|
|
return CI / II;
|
|
}
|
|
|
|
static void selectors_for_clusters(int c0, int c1, int c2, int selector[8]) {
|
|
int i = 0;
|
|
for (; i < c0; i++) selector[i] = 0;
|
|
for (; i < c0+c1; i++) selector[i] = 1;
|
|
for (; i < c0+c1+c2; i++) selector[i] = 2;
|
|
for (; i < 8; i++) selector[i] = 3;
|
|
}
|
|
|
|
static int cluster_count(int count = 8) {
|
|
int total = 0;
|
|
|
|
for (uint c0 = 0; c0 <= count; c0++) {
|
|
for (uint c1 = 0; c1 <= count-c0; c1++) {
|
|
for (uint c2 = 0; c2 <= count-c0-c1; c2++) {
|
|
total++;
|
|
}
|
|
}
|
|
}
|
|
|
|
// total is the number of possible cluster combinations.
|
|
return total;
|
|
}
|
|
|
|
// Does each partition have its own table index? Or is it shared for both?
|
|
|
|
|
|
void test_all_total_orders(const Vector4 colors[8], const float weights[8], int table_index) {
|
|
|
|
// @@ compute average luminance of each partition.
|
|
|
|
|
|
// @@ sort colors by the luminance differences respect to partition average.
|
|
|
|
// @@ compute luminance range, pick table index based on that. Try nearest indices also?
|
|
|
|
// For each cluster combination:
|
|
/*
|
|
for (uint c0 = 0; c0 <= count; c0++) {
|
|
for (uint c1 = 0; c1 <= count-c0; c1++) {
|
|
for (uint c2 = 0; c2 <= count-c0-c1; c2++) {
|
|
|
|
// compute selectors.
|
|
int selector[8];
|
|
selectors_for_clusters(c0, c1, c2, selector);
|
|
|
|
// compute base colors that minimize error in each partition.
|
|
|
|
// determine error for these quantized base colors. Record best cluster combination.
|
|
|
|
}
|
|
}
|
|
}
|
|
*/
|
|
}
|
|
|
|
void test_all_total_orders(const Vector4 input_colors[16], const float input_weights[16], uint count, bool flip, int table_index) {
|
|
|
|
// Slow method is to test both flip modes.
|
|
//test_all_total_orders(input_colors, input_weights, /*flip=*/false, int table_index);
|
|
//test_all_total_orders(input_colors, input_weights, /*flip=*/true, int table_index);
|
|
}
|
|
|
|
|
|
// @@ How do compute the error for a given base color?
|
|
|
|
// Compute indices using range fitting / quantization of input colors?
|
|
|
|
// Compute indices using range fitting.
|
|
|
|
void test_all_clusters() {
|
|
int count = 8; // Could be smaller.
|
|
|
|
for (uint c0 = 0; c0 <= count; c0++) {
|
|
Vector3 x1(0.0f);
|
|
float w1 = 0.0f;
|
|
|
|
for (uint c1 = 0; c1 <= count-c0; c1++) {
|
|
Vector3 x2(0.0f);
|
|
float w2 = 0.0f;
|
|
|
|
for (uint c2 = 0; c2 <= count-c0-c1; c2++) {
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static Color32 saturate_color(int R, int G, int B) {
|
|
Color32 c;
|
|
c.r = U8(clamp(R, 0, 255));
|
|
c.g = U8(clamp(G, 0, 255));
|
|
c.b = U8(clamp(B, 0, 255));
|
|
c.a = 255;
|
|
return c;
|
|
}
|
|
|
|
static void get_diff_subblock_palette(uint16 packed_color, uint table_idx, Color32 palette[4]) {
|
|
assert(table_idx < 8);
|
|
|
|
const int * intensity_table = etc_intensity_modifiers[table_idx];
|
|
|
|
int r, g, b;
|
|
unpack_color_555(packed_color, &r, &g, &b);
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
const int y = intensity_table[i];
|
|
palette[i] = saturate_color(r + y, g + y, b + y);
|
|
}
|
|
}
|
|
|
|
static bool get_diff_subblock_palette(uint16 packed_color, uint16 packed_delta, uint table_idx, Color32 palette[4]) {
|
|
assert(table_idx < 8);
|
|
|
|
const int * intensity_table = etc_intensity_modifiers[table_idx];
|
|
|
|
int r, g, b;
|
|
bool success = unpack_color_555(packed_color, packed_delta, &r, &g, &b);
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
const int y = intensity_table[i];
|
|
palette[i] = saturate_color(r + y, g + y, b + y);
|
|
}
|
|
|
|
return success;
|
|
}
|
|
|
|
static void get_abs_subblock_palette(uint16 packed_color, uint table_idx, Color32 palette[4]) {
|
|
assert(table_idx < 8);
|
|
|
|
const int * intensity_table = etc_intensity_modifiers[table_idx];
|
|
|
|
int r, g, b;
|
|
unpack_color_444(packed_color, &r, &g, &b);
|
|
|
|
for (int i = 0; i < 4; i++) {
|
|
const int y = intensity_table[i];
|
|
palette[i] = saturate_color(r + y, g + y, b + y);
|
|
}
|
|
}
|
|
|
|
static int get_selector(const ETC_Data & data, int x, int y) {
|
|
// Note selectors are arranged in column order.
|
|
return data.selector[x*4+y];
|
|
}
|
|
static int get_partition(const ETC_Data & data, int x, int y) {
|
|
assert(data.mode == ETC_Data::Mode_ETC1);
|
|
return data.etc.flip ? y > 1 : x > 1;
|
|
}
|
|
|
|
static void decode_etc1(const ETC_Data & data, Vector4 colors[16]) {
|
|
assert(data.mode == ETC_Data::Mode_ETC1);
|
|
|
|
Color32 palette[2][4];
|
|
|
|
if (data.etc.diff) {
|
|
// Decode colors in 555+333 mode.
|
|
get_diff_subblock_palette(data.etc.color0, data.etc.table0, palette[0]);
|
|
get_diff_subblock_palette(data.etc.color0, data.etc.color1, data.etc.table1, palette[1]);
|
|
}
|
|
else {
|
|
// Decode colors in 444,444 mode.
|
|
get_abs_subblock_palette(data.etc.color0, data.etc.table0, palette[0]);
|
|
get_abs_subblock_palette(data.etc.color1, data.etc.table1, palette[1]);
|
|
}
|
|
|
|
for (int y = 0; y < 4; y++) {
|
|
for (int x = 0; x < 4; x++) {
|
|
colors[y*4+x] = toVector4(palette[get_partition(data, x, y)][get_selector(data, x, y)]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void decode_etc2_t(const ETC_Data & data, Vector4 output_colors[16]) {
|
|
assert(data.mode == ETC_Data::Mode_T);
|
|
|
|
int r, g, b;
|
|
Color32 palette[4];
|
|
|
|
int d = etc_th_distances[data.t.table];
|
|
|
|
unpack_color_444(data.t.color0, &r, &g, &b);
|
|
palette[0] = saturate_color(r, g, b);
|
|
|
|
unpack_color_444(data.t.color1, &r, &g, &b);
|
|
palette[1] = saturate_color(r + d, g + d, b + d);
|
|
palette[2] = saturate_color(r, g, b);
|
|
palette[3] = saturate_color(r - d, g - d, b - d);
|
|
|
|
for (int y = 0; y < 4; y++) {
|
|
for (int x = 0; x < 4; x++) {
|
|
output_colors[y*4+x] = toVector4(palette[get_selector(data, x, y)]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void decode_etc2_h(const ETC_Data & data, Vector4 output_colors[16]) {
|
|
assert(data.mode == ETC_Data::Mode_H);
|
|
|
|
int r, g, b;
|
|
Color32 palette[4];
|
|
|
|
int d = etc_th_distances[data.t.table];
|
|
|
|
unpack_color_444(data.t.color0, &r, &g, &b);
|
|
palette[0] = saturate_color(r + d, g + d, b + d);
|
|
palette[1] = saturate_color(r - d, g - d, b - d);
|
|
|
|
unpack_color_444(data.t.color1, &r, &g, &b);
|
|
palette[2] = saturate_color(r + d, g + d, b + d);
|
|
palette[3] = saturate_color(r - d, g - d, b - d);
|
|
|
|
for (int y = 0; y < 4; y++) {
|
|
for (int x = 0; x < 4; x++) {
|
|
output_colors[y*4+x] = toVector4(palette[get_selector(data, x, y)]);
|
|
}
|
|
}
|
|
}
|
|
|
|
static void decode_etc2_planar(const ETC_Data & data, Vector4 output_colors[16]) {
|
|
assert(data.mode == ETC_Data::Mode_Planar);
|
|
|
|
int ro, go, bo; // origin color
|
|
int rh, gh, bh; // horizontal color
|
|
int rv, gv, bv; // vertical color
|
|
|
|
// Unpack from 676
|
|
ro = bitexpand(data.planar.ro, 6, 8); // r << 2 | r >> 4
|
|
go = bitexpand(data.planar.go, 7, 8); // g << 1 | g >> 6
|
|
bo = bitexpand(data.planar.bo, 6, 8);
|
|
|
|
rh = bitexpand(data.planar.rh, 6, 8);
|
|
gh = bitexpand(data.planar.gh, 7, 8);
|
|
bh = bitexpand(data.planar.bh, 6, 8);
|
|
|
|
rv = bitexpand(data.planar.rv, 6, 8);
|
|
gv = bitexpand(data.planar.gv, 7, 8);
|
|
bv = bitexpand(data.planar.bv, 6, 8);
|
|
|
|
|
|
for (int y = 0; y < 4; y++) {
|
|
for (int x = 0; x < 4; x++) {
|
|
int r = (4 * ro + x * (rh - ro) + y * (rv - ro) + 2) >> 2;
|
|
int g = (4 * go + x * (gh - go) + y * (gv - go) + 2) >> 2;
|
|
int b = (4 * bo + x * (bh - bo) + y * (bv - bo) + 2) >> 2;
|
|
|
|
int idx = 4 * y + x;
|
|
output_colors[idx].x = saturate(float(r) / 255.0f);
|
|
output_colors[idx].y = saturate(float(g) / 255.0f);
|
|
output_colors[idx].z = saturate(float(b) / 255.0f);
|
|
output_colors[idx].w = 1;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void decode_etc2(const ETC_Data & data, Vector4 colors[16]) {
|
|
|
|
if (data.mode == ETC_Data::Mode_ETC1) {
|
|
decode_etc1(data, colors);
|
|
}
|
|
else if (data.mode == ETC_Data::Mode_T) {
|
|
decode_etc2_t(data, colors);
|
|
}
|
|
else if (data.mode == ETC_Data::Mode_H) {
|
|
decode_etc2_h(data, colors);
|
|
}
|
|
else /*if (data.mode == ETC_Data::Mode_Planar)*/ {
|
|
decode_etc2_planar(data, colors);
|
|
}
|
|
}
|
|
|
|
|
|
static float get_alpha11(int base, int table, int mul, int index) {
|
|
int elevenbase = base*8+4;
|
|
int tabVal = eac_intensity_modifiers[table][index];
|
|
int elevenTabVal = tabVal*8;
|
|
|
|
if(mul!=0) elevenTabVal*=mul;
|
|
else elevenTabVal/=8;
|
|
|
|
//calculate sum
|
|
int elevenbits = elevenbase+elevenTabVal;
|
|
|
|
//clamp..
|
|
if(elevenbits>=256*8) elevenbits=256*8-1;
|
|
else if(elevenbits<0) elevenbits=0;
|
|
//elevenbits now contains the 11 bit alpha value as defined in the spec.
|
|
|
|
//extend to 16 bits before returning, since we don't have any good 11-bit file formats.
|
|
uint16 sixteenbits = (elevenbits<<5)+(elevenbits>>6);
|
|
|
|
return float(sixteenbits) / 65535.0f;
|
|
}
|
|
|
|
static float get_alpha8(int base, int table, int mul, int index) {
|
|
int value = clamp(base + eac_intensity_modifiers[table][index] * mul, 0, 255);
|
|
return value / 255.0f;
|
|
}
|
|
|
|
|
|
|
|
|
|
static void decode_eac_8(const EAC_Data & data, Vector4 output_colors[16], int output_channel = 3) {
|
|
for (int i = 0; i < 16; i++) {
|
|
int s = data.selector[4*(i%4) + i/4];
|
|
output_colors[i].component[output_channel] = get_alpha8(data.alpha, data.table_index, data.multiplier, s);
|
|
}
|
|
}
|
|
|
|
static void decode_eac_11(const EAC_Data & data, Vector4 output_colors[16], int output_channel = 0) {
|
|
for (int i = 0; i < 16; i++) {
|
|
int s = data.selector[4*(i%4) + i/4];
|
|
output_colors[i].component[output_channel] = get_alpha11(data.alpha, data.table_index, data.multiplier, s);
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
static float evaluate_mse(const Vector3 & p, const Vector3 & c, const Vector3 & w) {
|
|
Vector3 d = (p - c) * w;
|
|
return dot(d, d);
|
|
}
|
|
|
|
static float evaluate_rgb_mse(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, const ETC_Data & data) {
|
|
// Decode data and compare?
|
|
Vector4 colors[16];
|
|
decode_etc2(data, colors);
|
|
|
|
float error = 0;
|
|
for (int i = 0; i < 16; i++) {
|
|
error += input_weights[i] * evaluate_mse(input_colors[i].xyz(), colors[i].xyz(), options.color_weights);
|
|
}
|
|
return error;
|
|
}
|
|
|
|
|
|
static int select_table_index(const Vector3 & base_color, const Vector4 input_colors[16], const float input_weights[16], bool flip, int partition) {
|
|
|
|
//float min_lum_delta = NV_FLOAT_MAX;
|
|
float max_lum_delta = -NV_FLOAT_MAX;
|
|
|
|
int xb = partition ? 2 : 0;
|
|
int xe = partition ? 4 : 2;
|
|
|
|
for (int y = 0; y < 4; y++) {
|
|
for (int x = xb; x < xe; x++) {
|
|
int idx = flip ? x*4 + y : y*4 + x;
|
|
float lum_delta = dot(base_color, Vector3(1.0f/3)) - dot(input_colors[idx].xyz(), Vector3(1.0f/3));
|
|
//min_lum_delta = min(min_lum_delta, lum_delta);
|
|
max_lum_delta = max(max_lum_delta, fabsf(lum_delta));
|
|
}
|
|
}
|
|
|
|
int best_range = -1;
|
|
float best_error = NV_FLOAT_MAX;
|
|
for (int i = 0; i < 8; i++) {
|
|
float error = fabsf(etc_intensity_range[i] - 255 * max_lum_delta);
|
|
if (error < best_error) {
|
|
best_error = error;
|
|
best_range = i;
|
|
}
|
|
}
|
|
|
|
return best_range;
|
|
}
|
|
|
|
static float update_selectors(const Vector4 input_colors[16], const float input_weights[16], ETC_Data & data, const ETC_Options & options) {
|
|
|
|
Color32 palette[2][4];
|
|
|
|
if (data.etc.diff) {
|
|
// Decode colors in 555+333 mode.
|
|
get_diff_subblock_palette(data.etc.color0, data.etc.table0, palette[0]);
|
|
get_diff_subblock_palette(data.etc.color0, data.etc.color1, data.etc.table1, palette[1]);
|
|
}
|
|
else {
|
|
// Decode colors in 444,444 mode.
|
|
get_abs_subblock_palette(data.etc.color0, data.etc.table0, palette[0]);
|
|
get_abs_subblock_palette(data.etc.color1, data.etc.table1, palette[1]);
|
|
}
|
|
|
|
float total_error = 0;
|
|
|
|
for (int y = 0; y < 4; y++) {
|
|
for (int x = 0; x < 4; x++) {
|
|
int i = y*4 + x;
|
|
|
|
float best_error = NV_FLOAT_MAX;
|
|
int best_p = 0;
|
|
|
|
for (int p = 0; p < 4; p++) {
|
|
float error = evaluate_mse(toVector3(palette[get_partition(data, x, y)][p]), input_colors[i].xyz(), options.color_weights);
|
|
if (error < best_error) {
|
|
best_error = error;
|
|
best_p = p;
|
|
}
|
|
}
|
|
|
|
int s = x*4 + y;
|
|
data.selector[s] = U8(best_p);
|
|
|
|
total_error += best_error * input_weights[i];
|
|
}
|
|
}
|
|
|
|
return total_error;
|
|
}
|
|
|
|
|
|
static void partition_input_block(const Vector4 input_colors[16], const float input_weights[16], bool flip, int partition, Vector3 output_colors[8], float output_weights[8]) {
|
|
|
|
const int xb = partition ? 2 : 0;
|
|
const int xe = partition ? 4 : 2;
|
|
|
|
for (int y = 0, i = 0; y < 4; y++) {
|
|
for (int x = xb; x < xe; x++, i++) {
|
|
int idx = flip ? x*4 + y : y*4 + x;
|
|
|
|
output_colors[i] = input_colors[idx].xyz();
|
|
output_weights[i] = input_weights[idx];
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
struct ETC_SubBlock {
|
|
Vector3 color;
|
|
bool delta;
|
|
int table;
|
|
int indices[8];
|
|
};
|
|
|
|
static float evaluate_rgb_mse(const Vector3 colors[8], const float weights[8], const ETC_Options & options, ETC_SubBlock * sub_block) {
|
|
|
|
// Evaluate sub block palette.
|
|
Vector3 palette[4];
|
|
palette[0] = sub_block->color + Vector3(etc_intensity_modifiers[sub_block->table][0] / 255.0f);
|
|
palette[1] = sub_block->color + Vector3(etc_intensity_modifiers[sub_block->table][1] / 255.0f);
|
|
palette[2] = sub_block->color + Vector3(etc_intensity_modifiers[sub_block->table][2] / 255.0f);
|
|
palette[3] = sub_block->color + Vector3(etc_intensity_modifiers[sub_block->table][3] / 255.0f);
|
|
|
|
float mse = 0;
|
|
for (int i = 0; i < 8; i++) {
|
|
mse += evaluate_mse(colors[i], palette[sub_block->indices[i]], options.color_weights) * weights[i];
|
|
}
|
|
|
|
return mse;
|
|
}
|
|
|
|
static void optimize_base_color(const Vector3 colors[8], const float weights[8], ETC_SubBlock * sub_block) {
|
|
|
|
// @@ For a given index selection, find color that minimizes the error. RGB components are independent.
|
|
|
|
float D_sum = 0;
|
|
float R_sum = 0;
|
|
float G_sum = 0;
|
|
float B_sum = 0;
|
|
float W_sum = 0;
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
float Di = etc_intensity_modifiers[sub_block->table][sub_block->indices[i]] / 255.0f; // @@ precompute?
|
|
|
|
D_sum += Di * weights[i];
|
|
R_sum += colors[i].x * weights[i];
|
|
G_sum += colors[i].y * weights[i];
|
|
B_sum += colors[i].z * weights[i];
|
|
W_sum += weights[i];
|
|
}
|
|
|
|
sub_block->color.x = (R_sum - D_sum) / W_sum;
|
|
sub_block->color.y = (R_sum - D_sum) / W_sum;
|
|
sub_block->color.z = (R_sum - D_sum) / W_sum;
|
|
|
|
// @@ Estimate error (without quantization)
|
|
|
|
|
|
|
|
// @@ Repeat for all tables?
|
|
|
|
// @@ Given a new center, compute new indices, then update center?
|
|
|
|
}
|
|
|
|
|
|
|
|
static int reduce_colors(Vector3 * colors, float * weights, int count) {
|
|
|
|
int n = 0;
|
|
|
|
for (int i = 0; i < count; i++) {
|
|
|
|
if (weights[i] == 0.0f) {
|
|
// skip without incrementing n.
|
|
continue;
|
|
}
|
|
|
|
colors[n] = colors[i];
|
|
weights[n] = weights[i];
|
|
|
|
// find color[j] that matches color[i]
|
|
for (int j = i + 1; j < count; j++) {
|
|
if (colors[i] == colors[j]) { // @@ Compare within threshold?
|
|
weights[n] += weights[j];
|
|
weights[j] = 0.0f;
|
|
}
|
|
}
|
|
|
|
n++;
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
// stable sort. in place.
|
|
static void sort_colors(Vector3 * colors, float * weights, int count) {
|
|
assert(count <= 8);
|
|
|
|
// build the list of values
|
|
//int order[8];
|
|
float lum[8];
|
|
for (int i = 0; i < count; ++i) {
|
|
//order[i] = i;
|
|
lum[i] = colors[i].x + colors[i].y + colors[i].z;
|
|
}
|
|
|
|
// stable sort
|
|
for (int i = 0; i < count; ++i) {
|
|
for (int j = i; j > 0 && lum[j] < lum[j - 1]; --j) {
|
|
swap(lum[j], lum[j - 1]);
|
|
//swap(order[j], order[j - 1]);
|
|
swap(colors[j], colors[j - 1]);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
float optimize_center(float colors[4][10], uniform int p, uniform int table_level)
|
|
{
|
|
float best_center = 0;
|
|
for (uniform int q = 0; q < 4; q++)
|
|
{
|
|
best_center += (colors[q][7 + p] - get_etc1_dY(table_level, q)) * colors[q][3];
|
|
}
|
|
best_center /= 8;
|
|
|
|
float best_err = 0;
|
|
for (uniform int q = 0; q < 4; q++)
|
|
{
|
|
float dY = get_etc1_dY(table_level, q);
|
|
best_err += sq(clamp(best_center + dY, 0, 255) - colors[q][7 + p]) * colors[q][3];
|
|
}
|
|
|
|
for (uniform int branch = 0; branch < 4; branch++)
|
|
{
|
|
float new_center = 0;
|
|
float sum = 0;
|
|
for (uniform int q = 0; q < 4; q++)
|
|
{
|
|
if (branch <= 1 && q <= branch) continue;
|
|
if (branch >= 2 && q >= branch) continue;
|
|
new_center += (colors[q][7 + p] - get_etc1_dY(table_level, q)) * colors[q][3];
|
|
sum += colors[q][3];
|
|
}
|
|
|
|
new_center /= sum;
|
|
|
|
float err = 0;
|
|
for (uniform int q = 0; q < 4; q++)
|
|
{
|
|
float dY = get_etc1_dY(table_level, q);
|
|
err += sq(clamp(new_center + dY, 0, 255) - colors[q][7 + p]) * colors[q][3];
|
|
}
|
|
|
|
if (err < best_err)
|
|
{
|
|
best_err = err;
|
|
best_center = new_center;
|
|
}
|
|
}
|
|
|
|
return best_center;
|
|
}
|
|
*/
|
|
|
|
|
|
|
|
|
|
static void compress_etc1_test(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) {
|
|
|
|
Vector3 colors[8];
|
|
float weights[8];
|
|
//int xrefs[8];
|
|
ETC_SubBlock sub_block[2];
|
|
|
|
bool best_flip = false;
|
|
for (int flip = 0; flip <= 1; flip++) {
|
|
|
|
partition_input_block(input_colors, input_weights, !!flip, /*partition=*/0, colors, weights);
|
|
|
|
int count = reduce_colors(colors, weights, 8);
|
|
|
|
//sort_colors(colors, weights);
|
|
|
|
// @@ sort colors along luminance axis.
|
|
|
|
//sub_block[0].color
|
|
|
|
partition_input_block(input_colors, input_weights, !!flip, /*partition=*/1, colors, weights);
|
|
|
|
}
|
|
|
|
//pack_colors(sub_block[0].color, sub_block[1].color, &result->data);
|
|
|
|
result->error = update_selectors(input_colors, input_weights, result->data, options);
|
|
|
|
}
|
|
|
|
/*void pack_colors(const Vector3 & color0, const Vector3 & color1, const ETC_Options & options, ETC_Data * data) {
|
|
|
|
uint16 abs_c0 = U16(pack_color_444(color0));
|
|
uint16 abs_c1 = U16(pack_color_444(color1));
|
|
Vector3 abs_vc0 = unpack_color_444(abs_c0);
|
|
Vector3 abs_vc1 = unpack_color_444(abs_c1);
|
|
float abs_error = evaluate_mse(color0, abs_vc0, options.color_weights) + evaluate_mse(color1, abs_vc1, options.color_weights);
|
|
|
|
uint16 diff_c0 = U16(pack_color_555(color0));
|
|
Vector3 diff_vc0 = unpack_color_555(diff_c0);
|
|
uint16 diff_d1 = U16(pack_delta_333(color1 - diff_vc0));
|
|
Vector3 diff_vc1 = unpack_color_555(diff_c0, diff_d1);
|
|
float diff_error = evaluate_mse(color0, diff_vc0, options.color_weights) + evaluate_mse(color1, diff_vc1, options.color_weights);
|
|
|
|
if (diff_error < abs_error) {
|
|
data->etc.color0 = diff_c0;
|
|
data->etc.color1 = diff_c1;
|
|
return diff_error;
|
|
}
|
|
else {
|
|
if (abs_error < best_error) {
|
|
best_error = abs_error;
|
|
best_diff = false;
|
|
best_flip = flip;
|
|
best_c0 = abs_c0;
|
|
best_c1 = abs_c1;
|
|
best_vc0 = abs_vc0;
|
|
best_vc1 = abs_vc1;
|
|
}
|
|
}
|
|
}*/
|
|
|
|
static void compress_etc1_range_fit(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) {
|
|
|
|
float best_error = NV_FLOAT_MAX;
|
|
bool best_diff = false;
|
|
bool best_flip = false;
|
|
uint16 best_c0 = 0;
|
|
uint16 best_c1 = 0;
|
|
Vector3 best_vc0;
|
|
Vector3 best_vc1;
|
|
|
|
for (int flip = 0; flip <= 1; flip++) {
|
|
Vector3 color0 = get_partition_color_average(input_colors, input_weights, !!flip, /*partition=*/0);
|
|
Vector3 color1 = get_partition_color_average(input_colors, input_weights, !!flip, /*partition=*/1);
|
|
|
|
uint16 abs_c0 = U16(pack_color_444(color0));
|
|
uint16 abs_c1 = U16(pack_color_444(color1));
|
|
Vector3 abs_vc0 = unpack_color_444(abs_c0);
|
|
Vector3 abs_vc1 = unpack_color_444(abs_c1);
|
|
float abs_error = evaluate_mse(color0, abs_vc0, options.color_weights) + evaluate_mse(color1, abs_vc1, options.color_weights);
|
|
|
|
uint16 diff_c0 = U16(pack_color_555(color0));
|
|
Vector3 diff_vc0 = unpack_color_555(diff_c0);
|
|
uint16 diff_d1 = U16(pack_delta_333(color1 - diff_vc0));
|
|
Vector3 diff_vc1 = unpack_color_555(diff_c0, diff_d1);
|
|
float diff_error = evaluate_mse(color0, diff_vc0, options.color_weights) + evaluate_mse(color1, diff_vc1, options.color_weights);
|
|
|
|
if (diff_error < abs_error) {
|
|
if (diff_error < best_error) {
|
|
best_error = diff_error;
|
|
best_diff = true;
|
|
best_flip = !!flip;
|
|
best_c0 = diff_c0;
|
|
best_c1 = diff_d1;
|
|
best_vc0 = diff_vc0;
|
|
best_vc1 = diff_vc1;
|
|
}
|
|
}
|
|
else {
|
|
if (abs_error < best_error) {
|
|
best_error = abs_error;
|
|
best_diff = false;
|
|
best_flip = !!flip;
|
|
best_c0 = abs_c0;
|
|
best_c1 = abs_c1;
|
|
best_vc0 = abs_vc0;
|
|
best_vc1 = abs_vc1;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
result->data.mode = ETC_Data::Mode_ETC1;
|
|
result->data.etc.flip = best_flip;
|
|
result->data.etc.diff = best_diff;
|
|
result->data.etc.table0 = select_table_index(best_vc0, input_colors, input_weights, best_flip, /*partition=*/0);
|
|
result->data.etc.table1 = select_table_index(best_vc1, input_colors, input_weights, best_flip, /*partition=*/1);
|
|
result->data.etc.color0 = best_c0;
|
|
result->data.etc.color1 = best_c1;
|
|
|
|
result->error = update_selectors(input_colors, input_weights, result->data, options);
|
|
|
|
result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data);
|
|
}
|
|
|
|
#if HAVE_RGETC
|
|
#include "nvimage/ColorBlock.h"
|
|
|
|
void compress_etc1_rg(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) {
|
|
|
|
rg_etc1::etc1_pack_params pack_params;
|
|
//pack_params.m_quality = rg_etc1::cLowQuality;
|
|
pack_params.m_quality = rg_etc1::cMediumQuality; // @@ Select quality based on compression options.
|
|
|
|
ColorBlock rgba;
|
|
for (uint i = 0; i < 16; i++) {
|
|
rgba.color(i) = toColor32(input_colors[i]);
|
|
}
|
|
rgba.swizzle(2, 1, 0, 3);
|
|
|
|
BlockETC block;
|
|
rg_etc1::pack_etc1_block((void *)&block, (const uint *)rgba.colors(), pack_params);
|
|
|
|
unpack_etc2_block(&block, &result->data);
|
|
|
|
result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data);
|
|
}
|
|
#endif
|
|
|
|
static void compress_etc2_planar_solid(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) {
|
|
|
|
Vector3 C(0);
|
|
float W = 0;
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
C += input_colors[i].xyz() * input_weights[i];
|
|
W += input_weights[i];
|
|
}
|
|
|
|
C /= W;
|
|
|
|
// Convert colors to 676
|
|
result->data.mode = ETC_Data::Mode_Planar;
|
|
result->data.planar.ro = pack_float_6(C.x);
|
|
result->data.planar.go = pack_float_7(C.y);
|
|
result->data.planar.bo = pack_float_6(C.z);
|
|
|
|
result->data.planar.rh = result->data.planar.ro;
|
|
result->data.planar.gh = result->data.planar.go;
|
|
result->data.planar.bh = result->data.planar.bo;
|
|
|
|
result->data.planar.rv = result->data.planar.ro;
|
|
result->data.planar.gv = result->data.planar.go;
|
|
result->data.planar.bv = result->data.planar.bo;
|
|
|
|
// Evaluate error.
|
|
result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data);
|
|
}
|
|
|
|
// Least squares optimization of planar endpoints.
|
|
static void compress_etc2_planar_lsqr(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) {
|
|
|
|
// Isn't this a simple least squares problem?
|
|
// - Yes, but that doesn't take clamping and quantization into account.
|
|
// - Solve the least squares problem, then refine endpoints?
|
|
|
|
// This matrix is always the same! But not when using arbitrary weights!
|
|
// This would be faster computing the matrix first, then multiplying by the weight covariance matrix.
|
|
Matrix3 m(0);
|
|
|
|
// For every pixel, decoder does:
|
|
// int r = (4 * ro + x * (rh - ro) + y * (rv - ro) + 2) >> 2;
|
|
|
|
// R(x,y) = (4 * ro + x * (rh - ro) + y * (rv - ro) + 2) / 4;
|
|
|
|
// R(x,y) = ro * (1 - x/4 - y/4) + rh * x/4 + rv * y/4 + 1/2;
|
|
|
|
// a = x/4
|
|
// b = y/4
|
|
// c = 1 - a - b
|
|
// R(x,y) = ro * c + rh * a + rv * b + 1/2;
|
|
|
|
float A[3 * 16];
|
|
|
|
for (int y = 0; y < 4; y++) {
|
|
for (int x = 0; x < 4; x++) {
|
|
float w = input_weights[4*y+x];
|
|
//if ((x == 1 || x == 2) && (y == 1 && y == 2)) w *= 0.5;
|
|
|
|
float a = float(x) / 4 * w;
|
|
float b = float(y) / 4 * w;
|
|
float c = (1 - a - b) * w;
|
|
|
|
int i = y*4 + x;
|
|
A[3 * i + 0] = a;
|
|
A[3 * i + 1] = b;
|
|
A[3 * i + 2] = c;
|
|
|
|
/*for (int yy = 0; yy < 4; yy++) {
|
|
for (int xx = 0; xx < 4; xx++) {
|
|
float ww = input_weights[4*yy+xx];
|
|
//if ((xx == 1 || xx == 2) && (yy == 1 && yy == 2)) ww *= 0.5;
|
|
|
|
float aa = float(xx) / 4 * ww;
|
|
float bb = float(yy) / 4 * ww;
|
|
float cc = (1 - aa - bb) * ww;
|
|
|
|
m(0,0) += a * aa;
|
|
m(1,0) += b * aa;
|
|
m(2,0) += c * aa;
|
|
|
|
m(0,1) += a * bb;
|
|
m(1,1) += b * bb;
|
|
m(2,1) += c * bb;
|
|
|
|
m(0,2) += a * cc;
|
|
m(1,2) += b * cc;
|
|
m(2,2) += c * cc;
|
|
}
|
|
}*/
|
|
}
|
|
}
|
|
|
|
// At*A
|
|
for (int y = 0; y < 3; y++) {
|
|
for (int x = 0; x < 3; x++) {
|
|
float d = 0;
|
|
for (int i = 0; i < 16; i++) {
|
|
d += A[3*i+x] * A[3*i+y];
|
|
}
|
|
m(x, y) = d;
|
|
}
|
|
}
|
|
|
|
// Compute right side:
|
|
Vector3 Ca(0), Cb(0), Cc(0);
|
|
|
|
for (int y = 0; y < 4; y++) {
|
|
for (int x = 0; x < 4; x++) {
|
|
float a = float(x) / 4;
|
|
float b = float(y) / 4;
|
|
float c = 1 - a - b;
|
|
|
|
Vector3 C = input_colors[4*y+x].xyz() - Vector3(0.5f / 255);
|
|
|
|
Ca += C * a;
|
|
Cb += C * b;
|
|
Cc += C * c;
|
|
}
|
|
}
|
|
|
|
// Now we have 3 equations (one for each color component).
|
|
|
|
Vector3 R(Ca.x, Cb.x, Cc.x);
|
|
Vector3 G(Ca.y, Cb.y, Cc.y);
|
|
Vector3 B(Ca.z, Cb.z, Cc.z);
|
|
|
|
Vector3 r, g, b;
|
|
|
|
if (!solveLU(m, R, &r)) {
|
|
result->error = NV_FLOAT_MAX;
|
|
return;
|
|
}
|
|
if (!solveLU(m, G, &g)) {
|
|
result->error = NV_FLOAT_MAX;
|
|
return;
|
|
}
|
|
if (!solveLU(m, B, &b)) {
|
|
result->error = NV_FLOAT_MAX;
|
|
return;
|
|
}
|
|
|
|
Vector3 Ch(r.x, g.x, b.x);
|
|
Vector3 Cv(r.y, g.y, b.y);
|
|
Vector3 Co(r.z, g.z, b.z);
|
|
|
|
// Convert colors to 676
|
|
result->data.mode = ETC_Data::Mode_Planar;
|
|
result->data.planar.ro = pack_float_6(Co.x);
|
|
result->data.planar.go = pack_float_7(Co.y);
|
|
result->data.planar.bo = pack_float_6(Co.z);
|
|
|
|
result->data.planar.rh = pack_float_6(Ch.x);
|
|
result->data.planar.gh = pack_float_7(Ch.y);
|
|
result->data.planar.bh = pack_float_6(Ch.z);
|
|
|
|
result->data.planar.rv = pack_float_6(Cv.x);
|
|
result->data.planar.gv = pack_float_7(Cv.y);
|
|
result->data.planar.bv = pack_float_6(Cv.z);
|
|
|
|
// Evaluate error.
|
|
result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data);
|
|
|
|
bool refine_endpoints = true;
|
|
if (refine_endpoints) {
|
|
ETC_Solution best = *result;
|
|
|
|
// @@ The per-component errors are not correllated, test 8 combinations 3 times.
|
|
for (int i = 0; i < 8; i++) {
|
|
result->data.planar.ro = pack_float_6(Co.x, (i & 1) != 0);
|
|
result->data.planar.rh = pack_float_6(Ch.x, (i & 2) != 0);
|
|
result->data.planar.rv = pack_float_6(Cv.x, (i & 4) != 0);
|
|
|
|
result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data);
|
|
if (result->error < best.error) {
|
|
best = *result;
|
|
}
|
|
}
|
|
|
|
*result = best;
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
result->data.planar.go = pack_float_7(Co.y, (i & 1) != 0);
|
|
result->data.planar.gh = pack_float_7(Ch.y, (i & 2) != 0);
|
|
result->data.planar.gv = pack_float_7(Cv.y, (i & 4) != 0);
|
|
|
|
result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data);
|
|
if (result->error < best.error) {
|
|
best = *result;
|
|
}
|
|
}
|
|
|
|
*result = best;
|
|
|
|
for (int i = 0; i < 8; i++) {
|
|
result->data.planar.bo = pack_float_6(Co.z, (i & 1) != 0);
|
|
result->data.planar.bh = pack_float_6(Ch.z, (i & 2) != 0);
|
|
result->data.planar.bv = pack_float_6(Cv.z, (i & 4) != 0);
|
|
|
|
result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data);
|
|
if (result->error < best.error) {
|
|
best = *result;
|
|
}
|
|
}
|
|
|
|
*result = best;
|
|
}
|
|
}
|
|
|
|
|
|
static void process_input_colors(Vector4 input_colors[16]) {
|
|
for (int i = 0; i < 16; i++) {
|
|
input_colors[i] = saturate(input_colors[i]);
|
|
|
|
// @@ Sanitize input_weights?
|
|
// - Avoid blocks with all zero weight.
|
|
// - Normalize weights to avoid too small values?
|
|
// - Remove NaNs, infinites, etc.
|
|
}
|
|
}
|
|
|
|
static void process_input_alphas(Vector4 input_colors[16], int input_channel) {
|
|
for (int i = 0; i < 16; i++) {
|
|
input_colors[i].component[input_channel] = saturate(input_colors[i].component[input_channel]);
|
|
}
|
|
}
|
|
|
|
static void process_input_weights(float input_weights[16]) {
|
|
float max_weight = 0.0f;
|
|
for (int i = 0; i < 16; i++) {
|
|
max_weight = nv::max(max_weight, input_weights[i]);
|
|
}
|
|
|
|
const float min_weight = 0.0001f;
|
|
|
|
if (max_weight <= min_weight) {
|
|
// Handle degenerate case.
|
|
for (int i = 0; i < 16; i++) {
|
|
input_weights[i] = 1;
|
|
}
|
|
}
|
|
else {
|
|
for (int i = 0; i < 16; i++) {
|
|
// Clamp to positive.
|
|
input_weights[i] = nv::max(input_weights[i], 0.0f);
|
|
|
|
// Flush to zero.
|
|
if (input_weights[i] < min_weight) input_weights[i] = 0.0f;
|
|
|
|
// Normalize.
|
|
input_weights[i] /= max_weight;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
static float compress_etc_a1(Vector4 input_colors[16], float input_weights[16], const ETC_Options & options, void * output) {
|
|
assert(options.onebit_alpha == true);
|
|
|
|
// Classify block.
|
|
bool transparent_block = true;
|
|
bool opaque_block = true;
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
if (input_colors[i].w != 0) transparent_block = false;
|
|
if (input_colors[i].w != 1) opaque_block = false;
|
|
}
|
|
|
|
if (transparent_block) {
|
|
// @@ Encode trivial transparent block.
|
|
return 0;
|
|
}
|
|
|
|
if (opaque_block) {
|
|
// @@ Encode block with opaque bit set. @@ Isn't this like the standard encoder?
|
|
}
|
|
|
|
// @@ Encode mixed block.
|
|
nvCheck(false); // Not implemented!
|
|
|
|
//uint8 color_rgb[16*3];
|
|
//uint8 alpha[16];
|
|
//uint etc_word1, etc_word2;
|
|
//compressBlockDifferentialWithAlpha(bool isTransparent, uint8* img, uint8* alphaimg, uint8* imgdec, 4, 4, 0, 0, &etc_word1, &etc_word2);
|
|
|
|
return NV_FLOAT_MAX;
|
|
}
|
|
|
|
//uint etc_blocks = 0;
|
|
//uint planar_blocks = 0;
|
|
//#include "nvthread/Atomic.h"
|
|
|
|
static float compress_etc(Vector4 input_colors[16], float input_weights[16], const ETC_Options & options, void * output) {
|
|
assert(options.onebit_alpha == false);
|
|
|
|
ETC_Solution result;
|
|
compress_etc1_range_fit(input_colors, input_weights, options, &result);
|
|
|
|
if (options.use_rg_etc) {
|
|
#if HAVE_RGETC
|
|
ETC_Solution rg_result;
|
|
compress_etc1_rg(input_colors, input_weights, options, &rg_result);
|
|
if (rg_result.error < result.error) {
|
|
result = rg_result;
|
|
}
|
|
#else
|
|
// @@ Print warning?
|
|
#endif
|
|
}
|
|
|
|
if (options.enable_etc2) {
|
|
if (options.use_planar) {
|
|
ETC_Solution planar_result;
|
|
compress_etc2_planar_lsqr(input_colors, input_weights, options, &planar_result);
|
|
|
|
if (planar_result.error < result.error) {
|
|
result = planar_result;
|
|
//nv::atomicIncrement(&planar_blocks);
|
|
}
|
|
else {
|
|
//nv::atomicIncrement(&etc_blocks);
|
|
}
|
|
}
|
|
if (options.use_t_mode) {
|
|
// @@
|
|
}
|
|
if (options.use_h_mode) {
|
|
// @@
|
|
}
|
|
}
|
|
|
|
pack_etc2_block(result.data, (BlockETC *)output);
|
|
|
|
return result.error;
|
|
}
|
|
|
|
|
|
// Range search EAC compressor, slightly modified from ETCLib.
|
|
float compress_eac_range_search(Vector4 input_colors[16], float input_weights[16], int input_channel, const EAC_Options & options, void * output) {
|
|
|
|
// Find alpha range
|
|
float min_a = 1.0f;
|
|
float max_a = 0.0f;
|
|
for (uint i = 0; i < 16; i++) {
|
|
float a = input_colors[i].component[input_channel];
|
|
min_a = nv::min(min_a, a);
|
|
max_a = nv::max(max_a, a);
|
|
}
|
|
const float range_a = max_a - min_a;
|
|
|
|
EAC_Solution best;
|
|
best.error = NV_FLOAT_MAX;
|
|
|
|
// try each modifier table entry
|
|
static const uint MODIFIER_TABLE_ENTRYS = 16;
|
|
for (uint t = 0; t < MODIFIER_TABLE_ENTRYS; t++) {
|
|
static const uint MIN_VALUE_SELECTOR = 3;
|
|
static const uint MAX_VALUE_SELECTOR = 7;
|
|
|
|
const float fTableEntryCenter = (float)-eac_intensity_modifiers[t][MIN_VALUE_SELECTOR];
|
|
const float fTableEntryRange = (float)eac_intensity_modifiers[t][MAX_VALUE_SELECTOR] - eac_intensity_modifiers[t][MIN_VALUE_SELECTOR];
|
|
const float fCenterRatio = fTableEntryCenter / fTableEntryRange;
|
|
|
|
const int center = ftoi_round(255.0f * (min_a + fCenterRatio * range_a));
|
|
const int min_base = max(0, center - options.search_radius);
|
|
const int max_base = min(center + options.search_radius, 255);
|
|
|
|
for (int base = min_base; base <= max_base; base++) {
|
|
int range_multiplier = ftoi_round(255 * range_a / fTableEntryRange);
|
|
const int min_multiplier = clamp(range_multiplier - options.search_radius, 1, 15);
|
|
const int max_multiplier = clamp(range_multiplier + options.search_radius, 1, 15);
|
|
|
|
for (int multiplier = min_multiplier; multiplier <= max_multiplier; multiplier++) {
|
|
|
|
// find best selector for each pixel
|
|
float block_error = 0;
|
|
uint best_selector[16];
|
|
for (uint i = 0; i < 16; i++) {
|
|
|
|
float best_error_a = NV_FLOAT_MAX;
|
|
|
|
static const uint ALPHA_SELECTOR_BITS = 3;
|
|
static const uint ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS;
|
|
for (uint s = 0; s < ALPHA_SELECTORS; s++) {
|
|
float alpha;
|
|
if (options.use_11bit_mode) {
|
|
alpha = get_alpha11(base, t, multiplier, s);
|
|
}
|
|
else {
|
|
alpha = get_alpha8(base, t, multiplier, s);
|
|
}
|
|
|
|
float error_a = alpha - input_colors[i].component[input_channel];
|
|
error_a = error_a * error_a;
|
|
|
|
if (error_a < best_error_a) {
|
|
best_error_a = error_a;
|
|
best_selector[i] = s;
|
|
}
|
|
}
|
|
|
|
block_error += best_error_a * input_weights[i];
|
|
if (block_error > best.error) {
|
|
break; // Don't waste more time.
|
|
}
|
|
}
|
|
|
|
if (block_error < best.error) {
|
|
best.error = block_error;
|
|
|
|
best.data.alpha = base;
|
|
best.data.multiplier = multiplier;
|
|
best.data.table_index = t;
|
|
for (uint i = 0; i < 16; i++) {
|
|
// Flip selectors.
|
|
best.data.selector[i] = best_selector[4*(i%4) + i/4];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
pack_eac_block(best.data, (BlockEAC *)output);
|
|
|
|
return best.error;
|
|
}
|
|
|
|
|
|
|
|
|
|
// Public API:
|
|
|
|
void nv::decompress_etc(const void * input_block, Vector4 output_colors[16]) {
|
|
#if 1 // Our code
|
|
ETC_Data data;
|
|
unpack_etc2_block((const BlockETC *)input_block, &data);
|
|
|
|
decode_etc2(data, output_colors);
|
|
|
|
#elif HAVE_RGETC && 0
|
|
|
|
Color32 colors[16];
|
|
rg_etc1::unpack_etc1_block(input_block, &colors->u);
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
output_colors[i].x = colors[i].b * (1.0f / 255.0f);
|
|
output_colors[i].y = colors[i].g * (1.0f / 255.0f);
|
|
output_colors[i].z = colors[i].r * (1.0f / 255.0f);
|
|
output_colors[i].w = colors[i].a * (1.0f / 255.0f);
|
|
}
|
|
|
|
#elif HAVE_ETCPACK // Use etcpack for reference.
|
|
const BlockETC * block = (const BlockETC *)input_block;
|
|
|
|
uint8 colors[3*16];
|
|
uint part1 = POSH_SwapU32(block->data32[0]);
|
|
uint part2 = POSH_SwapU32(block->data32[1]);
|
|
decompressBlockETC2(part1, part2, colors, 4, 4, 0, 0);
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
output_colors[i].x = colors[3*i+0] * (1.0f / 255.0f);
|
|
output_colors[i].y = colors[3*i+1] * (1.0f / 255.0f);
|
|
output_colors[i].z = colors[3*i+2] * (1.0f / 255.0f);
|
|
output_colors[i].w = 1.0f;
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void nv::decompress_eac(const void * input_block, Vector4 output_colors[16], int output_channel) {
|
|
nvCheck(output_channel >= 0 && output_channel < 4);
|
|
|
|
#if 1
|
|
EAC_Data data;
|
|
unpack_eac_block((const BlockEAC *)input_block, &data);
|
|
decode_eac_11(data, output_colors, output_channel);
|
|
|
|
#elif HAVE_ETCPACK
|
|
// Use etcpack for reference.
|
|
formatSigned = 0;
|
|
|
|
uint16 alphas[16];
|
|
decompressBlockAlpha16bit((uint8*)input_block, (uint8*)alphas, 4, 4, 0, 0);
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
uint16 alpha = POSH_SwapU16(alphas[i]);
|
|
output_colors[i].component[output_channel] = alpha * (1.0f / 65535.0f);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
void nv::decompress_etc_eac(const void * input, Vector4 output_colors[16]) {
|
|
#if 1
|
|
BlockETC_EAC * input_block = (BlockETC_EAC *)input;
|
|
|
|
ETC_Data etc;
|
|
unpack_etc2_block(&input_block->etc, &etc);
|
|
decode_etc2(etc, output_colors);
|
|
|
|
EAC_Data eac;
|
|
unpack_eac_block(&input_block->eac, &eac);
|
|
decode_eac_8(eac, output_colors, 3);
|
|
|
|
#elif HAVE_ETCPACK
|
|
// Use etcpack for reference.
|
|
uint8 colors[4*16];
|
|
decompressBlockAlpha((uint8*)input_block, colors, 4, 4, 0, 0);
|
|
|
|
for (int i = 0; i < 16; i++) {
|
|
output_colors[i].x = colors[4*i+0] * (1.0f / 255.0f);
|
|
output_colors[i].y = colors[4*i+1] * (1.0f / 255.0f);
|
|
output_colors[i].z = colors[4*i+2] * (1.0f / 255.0f);
|
|
output_colors[i].w = colors[4*i+3] * (1.0f / 255.0f);
|
|
}
|
|
#endif
|
|
}
|
|
|
|
float nv::compress_etc1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output) {
|
|
|
|
process_input_colors(input_colors);
|
|
|
|
// @@ Use same options for all blocks?
|
|
ETC_Options options;
|
|
options.use_rg_etc = true;
|
|
options.enable_etc2 = false;
|
|
options.use_t_mode = false;
|
|
options.use_h_mode = false;
|
|
options.use_planar = false;
|
|
options.color_weights = color_weights;
|
|
|
|
return compress_etc(input_colors, input_weights, options, output);
|
|
}
|
|
|
|
float nv::compress_etc2(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output) {
|
|
|
|
process_input_colors(input_colors);
|
|
process_input_weights(input_weights);
|
|
|
|
ETC_Options options;
|
|
options.use_rg_etc = true;
|
|
options.enable_etc2 = true;
|
|
options.use_t_mode = false; // @@ Not implemented.
|
|
options.use_h_mode = false; // @@ Not implemented.
|
|
options.use_planar = true;
|
|
options.color_weights = color_weights;
|
|
|
|
return compress_etc(input_colors, input_weights, options, output);
|
|
}
|
|
|
|
float nv::compress_etc2_a1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output) {
|
|
|
|
process_input_colors(input_colors);
|
|
process_input_weights(input_weights);
|
|
|
|
ETC_Options options;
|
|
options.use_rg_etc = true;
|
|
options.enable_etc2 = true;
|
|
options.use_t_mode = false; // @@ Not implemented.
|
|
options.use_h_mode = false; // @@ Not implemented.
|
|
options.use_planar = true;
|
|
options.onebit_alpha = true;
|
|
options.color_weights = color_weights;
|
|
|
|
return compress_etc_a1(input_colors, input_weights, options, output);
|
|
}
|
|
|
|
|
|
float nv::compress_eac(Vector4 input_colors[16], float input_weights[16], int input_channel, int search_radius, bool use_11bit_mode, void * output) {
|
|
nvCheck(input_channel >= 0 && input_channel < 4);
|
|
|
|
process_input_alphas(input_colors, input_channel);
|
|
process_input_weights(input_weights);
|
|
|
|
EAC_Options options;
|
|
options.search_radius = search_radius;
|
|
options.use_11bit_mode = use_11bit_mode;
|
|
|
|
return compress_eac_range_search(input_colors, input_weights, input_channel, options, output);
|
|
}
|
|
|
|
float nv::compress_etc2_eac(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output) {
|
|
BlockETC_EAC * output_block = (BlockETC_EAC *)output;
|
|
float error = compress_etc2(input_colors, input_weights, color_weights, &output_block->etc);
|
|
error += compress_eac(input_colors, input_weights, /*input_channel=*/3, /*search_radius=*/1, /*use_11bit_mode=*/false, &output_block->eac);
|
|
return error;
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|