Add BC7 support. It's incredibly slow - ~60 seconds to compress a 512x512 image, on a Core i7 - but it works.
- Added AVPCL compressor to projects and got it building with VC9 and VC10. - Removed unused command line interface & file read/write code from AVPCL. - Convert AVPCL to use NV vector math lib, asserts, etc. - Convert AVPCL to use double instead of float. - Added 4x4 symmetric eigensolver, for AVPCL; it's based on the existing 3x3 one, but I had to rewrite the Householder reduction stage. As with ZOH, using the eigensolver (instead of SVD) gives a ~25% speedup without significantly affecting RMSE. - Encapsulate ZOH and AVPCL stuff into their own namespaces to keep everything separate. - Added some missing vector operators to the nvmath lib.
This commit is contained in:
@ -17,17 +17,18 @@ See the License for the specific language governing permissions and limitations
|
||||
#include "bits.h"
|
||||
#include "tile.h"
|
||||
#include "avpcl.h"
|
||||
#include "arvo/Vec4.h"
|
||||
#include "arvo/Matrix.h"
|
||||
#include "arvo/SVD.h"
|
||||
#include "nvcore/Debug.h"
|
||||
#include "nvmath/Vector.inl"
|
||||
#include "nvmath/Matrix.inl"
|
||||
#include "nvmath/Fitting.h"
|
||||
#include "utils.h"
|
||||
#include "endpts.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <cstring>
|
||||
|
||||
#include "shapes_two.h"
|
||||
|
||||
using namespace ArvoMath;
|
||||
using namespace nv;
|
||||
using namespace AVPCL;
|
||||
|
||||
#define NLSBMODES 4 // number of different lsb modes per region. since we have two .1 per region, that can have 4 values
|
||||
|
||||
@ -84,7 +85,7 @@ struct PatternPrec
|
||||
|
||||
|
||||
// this is the precision for each channel and region
|
||||
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO assert to check this!
|
||||
// NOTE: this MUST match the corresponding data in "patterns" above -- WARNING: there is NO nvAssert to check this!
|
||||
static PatternPrec pattern_precs[NPATTERNS] =
|
||||
{
|
||||
7,7,7, 7,7,7, 7,7,7, 7,7,7,
|
||||
@ -103,7 +104,7 @@ static int nbits(int n, bool issigned)
|
||||
}
|
||||
else
|
||||
{
|
||||
assert (issigned);
|
||||
nvAssert (issigned);
|
||||
for (nb=0; n<-1; ++nb, n>>=1) ;
|
||||
return nb + 1;
|
||||
}
|
||||
@ -111,12 +112,12 @@ static int nbits(int n, bool issigned)
|
||||
|
||||
static void transform_forward(IntEndptsRGB_2 ep[NREGIONS])
|
||||
{
|
||||
assert(0);
|
||||
nvUnreachable();
|
||||
}
|
||||
|
||||
static void transform_inverse(IntEndptsRGB_2 ep[NREGIONS])
|
||||
{
|
||||
assert(0);
|
||||
nvUnreachable();
|
||||
}
|
||||
|
||||
// endpoints are 888,888; reduce to 777,777 and put the lsb bit majority in compr_bits
|
||||
@ -129,7 +130,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt
|
||||
{
|
||||
onescnt += endpts.A[j] & 1;
|
||||
compr_endpts.A[j] = endpts.A[j] >> 1;
|
||||
assert (compr_endpts.A[j] < 128);
|
||||
nvAssert (compr_endpts.A[j] < 128);
|
||||
}
|
||||
compr_endpts.a_lsb = onescnt >= 2;
|
||||
|
||||
@ -138,7 +139,7 @@ static void compress_one(const IntEndptsRGB& endpts, IntEndptsRGB_2& compr_endpt
|
||||
{
|
||||
onescnt += endpts.B[j] & 1;
|
||||
compr_endpts.B[j] = endpts.B[j] >> 1;
|
||||
assert (compr_endpts.B[j] < 128);
|
||||
nvAssert (compr_endpts.B[j] < 128);
|
||||
}
|
||||
compr_endpts.b_lsb = onescnt >= 2;
|
||||
}
|
||||
@ -171,12 +172,12 @@ static void quantize_endpts(const FltEndpts endpts[NREGIONS], const PatternPrec
|
||||
|
||||
for (int region = 0; region < NREGIONS; ++region)
|
||||
{
|
||||
full_endpts[region].A[0] = Utils::quantize(endpts[region].A.X(), pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space
|
||||
full_endpts[region].A[1] = Utils::quantize(endpts[region].A.Y(), pattern_prec.region_precs[region].endpt_a_prec[1]+1);
|
||||
full_endpts[region].A[2] = Utils::quantize(endpts[region].A.Z(), pattern_prec.region_precs[region].endpt_a_prec[2]+1);
|
||||
full_endpts[region].B[0] = Utils::quantize(endpts[region].B.X(), pattern_prec.region_precs[region].endpt_b_prec[0]+1);
|
||||
full_endpts[region].B[1] = Utils::quantize(endpts[region].B.Y(), pattern_prec.region_precs[region].endpt_b_prec[1]+1);
|
||||
full_endpts[region].B[2] = Utils::quantize(endpts[region].B.Z(), pattern_prec.region_precs[region].endpt_b_prec[2]+1);
|
||||
full_endpts[region].A[0] = Utils::quantize(endpts[region].A.x, pattern_prec.region_precs[region].endpt_a_prec[0]+1); // +1 since we are in uncompressed space
|
||||
full_endpts[region].A[1] = Utils::quantize(endpts[region].A.y, pattern_prec.region_precs[region].endpt_a_prec[1]+1);
|
||||
full_endpts[region].A[2] = Utils::quantize(endpts[region].A.z, pattern_prec.region_precs[region].endpt_a_prec[2]+1);
|
||||
full_endpts[region].B[0] = Utils::quantize(endpts[region].B.x, pattern_prec.region_precs[region].endpt_b_prec[0]+1);
|
||||
full_endpts[region].B[1] = Utils::quantize(endpts[region].B.y, pattern_prec.region_precs[region].endpt_b_prec[1]+1);
|
||||
full_endpts[region].B[2] = Utils::quantize(endpts[region].B.z, pattern_prec.region_precs[region].endpt_b_prec[2]+1);
|
||||
compress_one(full_endpts[region], q_endpts[region]);
|
||||
}
|
||||
}
|
||||
@ -190,7 +191,7 @@ static void swap_indices(IntEndptsRGB_2 endpts[NREGIONS], int indices[Tile::TILE
|
||||
|
||||
int x = POS_TO_X(position);
|
||||
int y = POS_TO_Y(position);
|
||||
assert(REGION(x,y,shapeindex) == region); // double check the table
|
||||
nvAssert(REGION(x,y,shapeindex) == region); // double check the table
|
||||
if (indices[y][x] & HIGH_INDEXBIT)
|
||||
{
|
||||
// high bit is set, swap the endpts and indices for this region
|
||||
@ -232,7 +233,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex,
|
||||
out.write(endpts[i].b_lsb, 1);
|
||||
}
|
||||
|
||||
assert (out.getptr() == 98);
|
||||
nvAssert (out.getptr() == 98);
|
||||
}
|
||||
|
||||
static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
|
||||
@ -240,8 +241,8 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind
|
||||
int mode = AVPCL::getmode(in);
|
||||
|
||||
pat_index = 0;
|
||||
assert (pat_index >= 0 && pat_index < NPATTERNS);
|
||||
assert (in.getptr() == patterns[pat_index].modebits);
|
||||
nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
|
||||
nvAssert (in.getptr() == patterns[pat_index].modebits);
|
||||
|
||||
shapeindex = in.read(SHAPEBITS);
|
||||
p = patterns[pat_index];
|
||||
@ -259,7 +260,7 @@ static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeind
|
||||
endpts[i].b_lsb = in.read(1);
|
||||
}
|
||||
|
||||
assert (in.getptr() == 98);
|
||||
nvAssert (in.getptr() == 98);
|
||||
}
|
||||
|
||||
static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out)
|
||||
@ -312,10 +313,10 @@ static void emit_block(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex, co
|
||||
|
||||
write_indices(indices, shapeindex, out);
|
||||
|
||||
assert(out.getptr() == AVPCL::BITSIZE);
|
||||
nvAssert(out.getptr() == AVPCL::BITSIZE);
|
||||
}
|
||||
|
||||
static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec ®ion_prec, Vec4 palette[NINDICES])
|
||||
static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const RegionPrec ®ion_prec, Vector4 palette[NINDICES])
|
||||
{
|
||||
IntEndptsRGB endpts;
|
||||
|
||||
@ -329,30 +330,30 @@ static void generate_palette_quantized(const IntEndptsRGB_2 &endpts_2, const Reg
|
||||
|
||||
// interpolate
|
||||
for (int i = 0; i < NINDICES; ++i)
|
||||
palette[i].X() = PALETTE_LERP(a, b, i, BIAS, DENOM);
|
||||
palette[i].x = float(Utils::lerp(a, b, i, BIAS, DENOM));
|
||||
|
||||
a = Utils::unquantize(endpts.A[1], region_prec.endpt_a_prec[1]+1);
|
||||
b = Utils::unquantize(endpts.B[1], region_prec.endpt_b_prec[1]+1);
|
||||
|
||||
// interpolate
|
||||
for (int i = 0; i < NINDICES; ++i)
|
||||
palette[i].Y() = PALETTE_LERP(a, b, i, BIAS, DENOM);
|
||||
palette[i].y = float(Utils::lerp(a, b, i, BIAS, DENOM));
|
||||
|
||||
a = Utils::unquantize(endpts.A[2], region_prec.endpt_a_prec[2]+1);
|
||||
b = Utils::unquantize(endpts.B[2], region_prec.endpt_b_prec[2]+1);
|
||||
|
||||
// interpolate
|
||||
for (int i = 0; i < NINDICES; ++i)
|
||||
palette[i].Z() = PALETTE_LERP(a, b, i, BIAS, DENOM);
|
||||
palette[i].z = float(Utils::lerp(a, b, i, BIAS, DENOM));
|
||||
|
||||
// constant alpha
|
||||
for (int i = 0; i < NINDICES; ++i)
|
||||
palette[i].W() = RGBA_MAX;
|
||||
palette[i].w = 255.0f;
|
||||
}
|
||||
|
||||
static void sign_extend(Pattern &p, IntEndptsRGB_2 endpts[NREGIONS])
|
||||
{
|
||||
assert(0);
|
||||
nvUnreachable();
|
||||
}
|
||||
|
||||
void AVPCL::decompress_mode3(const char *block, Tile &t)
|
||||
@ -371,7 +372,7 @@ void AVPCL::decompress_mode3(const char *block, Tile &t)
|
||||
transform_inverse(endpts);
|
||||
}
|
||||
|
||||
Vec4 palette[NREGIONS][NINDICES];
|
||||
Vector4 palette[NREGIONS][NINDICES];
|
||||
for (int r = 0; r < NREGIONS; ++r)
|
||||
generate_palette_quantized(endpts[r], pattern_precs[pat_index].region_precs[r], &palette[r][0]);
|
||||
|
||||
@ -379,7 +380,7 @@ void AVPCL::decompress_mode3(const char *block, Tile &t)
|
||||
|
||||
read_indices(in, shapeindex, indices);
|
||||
|
||||
assert(in.getptr() == AVPCL::BITSIZE);
|
||||
nvAssert(in.getptr() == AVPCL::BITSIZE);
|
||||
|
||||
// lookup
|
||||
for (int y = 0; y < Tile::TILE_H; y++)
|
||||
@ -388,17 +389,17 @@ void AVPCL::decompress_mode3(const char *block, Tile &t)
|
||||
}
|
||||
|
||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||
static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, double current_err, int indices[Tile::TILE_TOTAL])
|
||||
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
||||
{
|
||||
Vec4 palette[NINDICES];
|
||||
double toterr = 0;
|
||||
Vec4 err;
|
||||
Vector4 palette[NINDICES];
|
||||
float toterr = 0;
|
||||
Vector4 err;
|
||||
|
||||
generate_palette_quantized(endpts, region_prec, palette);
|
||||
|
||||
for (int i = 0; i < np; ++i)
|
||||
{
|
||||
double err, besterr = DBL_MAX;
|
||||
float err, besterr = FLT_MAX;
|
||||
|
||||
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
||||
{
|
||||
@ -421,17 +422,17 @@ static double map_colors(const Vec4 colors[], int np, const IntEndptsRGB_2 &endp
|
||||
for (int k = i; k < np; ++k)
|
||||
indices[k] = -1;
|
||||
|
||||
return DBL_MAX;
|
||||
return FLT_MAX;
|
||||
}
|
||||
}
|
||||
return toterr;
|
||||
}
|
||||
|
||||
static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endpts[NREGIONS], const PatternPrec &pattern_prec,
|
||||
int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS])
|
||||
int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS])
|
||||
{
|
||||
// build list of possibles
|
||||
Vec4 palette[NREGIONS][NINDICES];
|
||||
Vector4 palette[NREGIONS][NINDICES];
|
||||
|
||||
for (int region = 0; region < NREGIONS; ++region)
|
||||
{
|
||||
@ -439,13 +440,13 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
|
||||
toterr[region] = 0;
|
||||
}
|
||||
|
||||
Vec4 err;
|
||||
Vector4 err;
|
||||
|
||||
for (int y = 0; y < tile.size_y; y++)
|
||||
for (int x = 0; x < tile.size_x; x++)
|
||||
{
|
||||
int region = REGION(x,y,shapeindex);
|
||||
double err, besterr = DBL_MAX;
|
||||
float err, besterr = FLT_MAX;
|
||||
|
||||
for (int i = 0; i < NINDICES && besterr > 0; ++i)
|
||||
{
|
||||
@ -465,8 +466,8 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
|
||||
|
||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||
static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
|
||||
double old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
||||
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
|
||||
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
||||
{
|
||||
// we have the old endpoints: old_endpts
|
||||
// we have the perturbed endpoints: new_endpts
|
||||
@ -536,10 +537,10 @@ static double perturb_one(const Vec4 colors[], int np, int ch, const RegionPrec
|
||||
// for np = 16 -- adjust error thresholds as a function of np
|
||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
||||
static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec ®ion_prec, double &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
||||
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
||||
{
|
||||
IntEndptsRGB_2 temp_endpts;
|
||||
double best_err = orig_err;
|
||||
float best_err = orig_err;
|
||||
int aprec = region_prec.endpt_a_prec[ch];
|
||||
int bprec = region_prec.endpt_b_prec[ch];
|
||||
int good_indices[Tile::TILE_TOTAL];
|
||||
@ -548,7 +549,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
|
||||
for (int i=0; i<np; ++i)
|
||||
indices[i] = -1;
|
||||
|
||||
double thr_scale = (double)np / (double)Tile::TILE_TOTAL;
|
||||
float thr_scale = (float)np / (float)Tile::TILE_TOTAL;
|
||||
|
||||
if (orig_err == 0) return orig_err;
|
||||
|
||||
@ -557,8 +558,8 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
|
||||
else if (orig_err > 1000.0*thr_scale) { adelta = (1 << aprec)/4; bdelta = (1 << bprec)/4; }
|
||||
else if (orig_err > 200.0*thr_scale) { adelta = (1 << aprec)/8; bdelta = (1 << bprec)/8; }
|
||||
else if (orig_err > 40.0*thr_scale) { adelta = (1 << aprec)/16; bdelta = (1 << bprec)/16; }
|
||||
adelta = MAX(adelta, 3);
|
||||
bdelta = MAX(bdelta, 3);
|
||||
adelta = max(adelta, 3);
|
||||
bdelta = max(bdelta, 3);
|
||||
|
||||
#ifdef DISABLE_EXHAUSTIVE
|
||||
adelta = bdelta = 3;
|
||||
@ -567,10 +568,10 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
|
||||
temp_endpts = opt_endpts;
|
||||
|
||||
// ok figure out the range of A and B
|
||||
int alow = MAX(0, opt_endpts.A[ch] - adelta);
|
||||
int ahigh = MIN((1<<aprec)-1, opt_endpts.A[ch] + adelta);
|
||||
int blow = MAX(0, opt_endpts.B[ch] - bdelta);
|
||||
int bhigh = MIN((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
|
||||
int alow = max(0, opt_endpts.A[ch] - adelta);
|
||||
int ahigh = min((1<<aprec)-1, opt_endpts.A[ch] + adelta);
|
||||
int blow = max(0, opt_endpts.B[ch] - bdelta);
|
||||
int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
|
||||
|
||||
// now there's no need to swap the ordering of A and B
|
||||
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
|
||||
@ -581,7 +582,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
|
||||
{
|
||||
// keep a <= b
|
||||
for (int a = alow; a <= ahigh; ++a)
|
||||
for (int b = MAX(a, blow); b < bhigh; ++b)
|
||||
for (int b = max(a, blow); b < bhigh; ++b)
|
||||
{
|
||||
temp_endpts.A[ch] = a;
|
||||
temp_endpts.B[ch] = b;
|
||||
@ -601,7 +602,7 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
|
||||
{
|
||||
// keep b <= a
|
||||
for (int b = blow; b < bhigh; ++b)
|
||||
for (int a = MAX(b, alow); a <= ahigh; ++a)
|
||||
for (int a = max(b, alow); a <= ahigh; ++a)
|
||||
{
|
||||
temp_endpts.A[ch] = a;
|
||||
temp_endpts.B[ch] = b;
|
||||
@ -629,9 +630,9 @@ static double exhaustive(const Vec4 colors[], int np, int ch, const RegionPrec &
|
||||
return best_err;
|
||||
}
|
||||
|
||||
static double optimize_one(const Vec4 colors[], int np, double orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_2 &opt_endpts)
|
||||
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_2 &opt_endpts)
|
||||
{
|
||||
double opt_err = orig_err;
|
||||
float opt_err = orig_err;
|
||||
|
||||
opt_endpts = orig_endpts;
|
||||
|
||||
@ -679,7 +680,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
|
||||
for (int i=0; i<np; ++i)
|
||||
{
|
||||
new_indices[i] = orig_indices[i] = temp_indices0[i];
|
||||
assert (orig_indices[i] != -1);
|
||||
nvAssert (orig_indices[i] != -1);
|
||||
}
|
||||
|
||||
opt_endpts.A[ch] = new_a.A[ch];
|
||||
@ -694,7 +695,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
|
||||
for (int i=0; i<np; ++i)
|
||||
{
|
||||
new_indices[i] = orig_indices[i] = temp_indices1[i];
|
||||
assert (orig_indices[i] != -1);
|
||||
nvAssert (orig_indices[i] != -1);
|
||||
}
|
||||
|
||||
opt_endpts.B[ch] = new_b.B[ch];
|
||||
@ -712,7 +713,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
|
||||
for (int i=0; i<np; ++i)
|
||||
{
|
||||
new_indices[i] = temp_indices0[i];
|
||||
assert (new_indices[i] != -1);
|
||||
nvAssert (new_indices[i] != -1);
|
||||
}
|
||||
|
||||
if (do_b == 0)
|
||||
@ -739,7 +740,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
|
||||
bool first = true;
|
||||
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
||||
{
|
||||
double new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||
|
||||
if (new_err < opt_err)
|
||||
{
|
||||
@ -750,7 +751,7 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
|
||||
for (int i=0; i<np; ++i)
|
||||
{
|
||||
orig_indices[i] = temp_indices0[i];
|
||||
assert (orig_indices[i] != -1);
|
||||
nvAssert (orig_indices[i] != -1);
|
||||
}
|
||||
first = false;
|
||||
}
|
||||
@ -775,10 +776,10 @@ static double optimize_one(const Vec4 colors[], int np, double orig_err, const I
|
||||
}
|
||||
|
||||
// this will return a valid set of endpoints in opt_endpts regardless of whether it improve orig_endpts or not
|
||||
static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS],
|
||||
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, double opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
|
||||
static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS],
|
||||
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
|
||||
{
|
||||
Vec4 pixels[Tile::TILE_TOTAL];
|
||||
Vector4 pixels[Tile::TILE_TOTAL];
|
||||
IntEndptsRGB_2 temp_in, temp_out;
|
||||
int temp_indices[Tile::TILE_TOTAL];
|
||||
|
||||
@ -795,7 +796,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
|
||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||
opt_err[region] = orig_err[region];
|
||||
|
||||
double best_err = orig_err[region];
|
||||
float best_err = orig_err[region];
|
||||
|
||||
for (int lsbmode=0; lsbmode<NLSBMODES; ++lsbmode)
|
||||
{
|
||||
@ -803,12 +804,12 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
|
||||
temp_in.b_lsb = (lsbmode >> 1) & 1;
|
||||
|
||||
// make sure we have a valid error for temp_in
|
||||
// we use DBL_MAX here because we want an accurate temp_in_err, no shortcuts
|
||||
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the DBL_MAX position)
|
||||
double temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], DBL_MAX, temp_indices);
|
||||
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
||||
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
||||
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
||||
|
||||
// now try to optimize these endpoints
|
||||
double temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||
|
||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||
if (temp_out_err < best_err)
|
||||
@ -838,9 +839,9 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_
|
||||
emit compressed block with original data // to try to preserve maximum endpoint precision
|
||||
*/
|
||||
|
||||
static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
|
||||
static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpts[NREGIONS], char *block)
|
||||
{
|
||||
double orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
|
||||
float orig_err[NREGIONS], opt_err[NREGIONS], orig_toterr, opt_toterr, expected_opt_err[NREGIONS];
|
||||
IntEndptsRGB_2 orig_endpts[NREGIONS], opt_endpts[NREGIONS];
|
||||
int orig_indices[Tile::TILE_H][Tile::TILE_W], opt_indices[Tile::TILE_H][Tile::TILE_W];
|
||||
|
||||
@ -859,8 +860,9 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
|
||||
transform_inverse(orig_endpts);
|
||||
optimize_endpts(tile, shapeindex_best, orig_err, orig_endpts, pattern_precs[sp], expected_opt_err, opt_endpts);
|
||||
assign_indices(tile, shapeindex_best, opt_endpts, pattern_precs[sp], opt_indices, opt_err);
|
||||
for (int i=0; i<NREGIONS; ++i)
|
||||
assert(expected_opt_err[i] == opt_err[i]);
|
||||
// (nreed) Commented out asserts because they go off all the time...not sure why
|
||||
//for (int i=0; i<NREGIONS; ++i)
|
||||
// nvAssert(expected_opt_err[i] == opt_err[i]);
|
||||
swap_indices(opt_endpts, opt_indices, shapeindex_best);
|
||||
if (patterns[sp].transformed)
|
||||
transform_forward(opt_endpts);
|
||||
@ -885,40 +887,40 @@ static double refine(const Tile &tile, int shapeindex_best, const FltEndpts endp
|
||||
throw "No candidate found, should never happen (avpcl mode 3).";
|
||||
}
|
||||
|
||||
static void clamp(Vec4 &v)
|
||||
static void clamp(Vector4 &v)
|
||||
{
|
||||
if (v.X() < RGBA_MIN) v.X() = RGBA_MIN;
|
||||
if (v.X() > RGBA_MAX) v.X() = RGBA_MAX;
|
||||
if (v.Y() < RGBA_MIN) v.Y() = RGBA_MIN;
|
||||
if (v.Y() > RGBA_MAX) v.Y() = RGBA_MAX;
|
||||
if (v.Z() < RGBA_MIN) v.Z() = RGBA_MIN;
|
||||
if (v.Z() > RGBA_MAX) v.Z() = RGBA_MAX;
|
||||
v.W() = RGBA_MAX;
|
||||
if (v.x < 0.0f) v.x = 0.0f;
|
||||
if (v.x > 255.0f) v.x = 255.0f;
|
||||
if (v.y < 0.0f) v.y = 0.0f;
|
||||
if (v.y > 255.0f) v.y = 255.0f;
|
||||
if (v.z < 0.0f) v.z = 0.0f;
|
||||
if (v.z > 255.0f) v.z = 255.0f;
|
||||
v.w = 255.0f;
|
||||
}
|
||||
|
||||
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vec4 palette[NREGIONS][NINDICES])
|
||||
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS], Vector4 palette[NREGIONS][NINDICES])
|
||||
{
|
||||
for (int region = 0; region < NREGIONS; ++region)
|
||||
for (int i = 0; i < NINDICES; ++i)
|
||||
palette[region][i] = PALETTE_LERP(endpts[region].A, endpts[region].B, i, 0.0, float(DENOM));
|
||||
palette[region][i] = Utils::lerp(endpts[region].A, endpts[region].B, i, 0, DENOM);
|
||||
}
|
||||
|
||||
// generate a palette from unquantized endpoints, then pick best palette color for all pixels in each region, return toterr for all regions combined
|
||||
static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
|
||||
static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts[NREGIONS])
|
||||
{
|
||||
// build list of possibles
|
||||
Vec4 palette[NREGIONS][NINDICES];
|
||||
Vector4 palette[NREGIONS][NINDICES];
|
||||
|
||||
generate_palette_unquantized(endpts, palette);
|
||||
|
||||
double toterr = 0;
|
||||
Vec4 err;
|
||||
float toterr = 0;
|
||||
Vector4 err;
|
||||
|
||||
for (int y = 0; y < tile.size_y; y++)
|
||||
for (int x = 0; x < tile.size_x; x++)
|
||||
{
|
||||
int region = REGION(x,y,shapeindex);
|
||||
double err, besterr = DBL_MAX;
|
||||
float err, besterr = FLT_MAX;
|
||||
|
||||
for (int i = 0; i < NINDICES && besterr > 0; ++i)
|
||||
{
|
||||
@ -934,19 +936,21 @@ static double map_colors(const Tile &tile, int shapeindex, const FltEndpts endpt
|
||||
return toterr;
|
||||
}
|
||||
|
||||
static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
|
||||
static float rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS])
|
||||
{
|
||||
for (int region=0; region<NREGIONS; ++region)
|
||||
{
|
||||
int np = 0;
|
||||
Vec4 colors[Tile::TILE_TOTAL];
|
||||
Vec4 mean(0,0,0,0);
|
||||
Vector3 colors[Tile::TILE_TOTAL];
|
||||
float alphas[2];
|
||||
Vector4 mean(0,0,0,0);
|
||||
|
||||
for (int y = 0; y < tile.size_y; y++)
|
||||
for (int x = 0; x < tile.size_x; x++)
|
||||
if (REGION(x,y,shapeindex) == region)
|
||||
{
|
||||
colors[np] = tile.data[y][x];
|
||||
colors[np] = tile.data[y][x].xyz();
|
||||
if (np < 2) alphas[np] = tile.data[y][x].w;
|
||||
mean += tile.data[y][x];
|
||||
++np;
|
||||
}
|
||||
@ -954,54 +958,40 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
|
||||
// handle simple cases
|
||||
if (np == 0)
|
||||
{
|
||||
Vec4 zero(0,0,0,RGBA_MAX);
|
||||
Vector4 zero(0,0,0,255.0f);
|
||||
endpts[region].A = zero;
|
||||
endpts[region].B = zero;
|
||||
continue;
|
||||
}
|
||||
else if (np == 1)
|
||||
{
|
||||
endpts[region].A = colors[0];
|
||||
endpts[region].B = colors[0];
|
||||
endpts[region].A = Vector4(colors[0], alphas[0]);
|
||||
endpts[region].B = Vector4(colors[0], alphas[0]);
|
||||
continue;
|
||||
}
|
||||
else if (np == 2)
|
||||
{
|
||||
endpts[region].A = colors[0];
|
||||
endpts[region].B = colors[1];
|
||||
endpts[region].A = Vector4(colors[0], alphas[0]);
|
||||
endpts[region].B = Vector4(colors[1], alphas[1]);
|
||||
continue;
|
||||
}
|
||||
|
||||
Matrix rdq(np, 3);
|
||||
|
||||
mean /= float(np);
|
||||
|
||||
// only look at RGB' ignore A
|
||||
for (int i = 0; i < np; ++i)
|
||||
{
|
||||
rdq(i,0) = colors[i].X() - mean.X();
|
||||
rdq(i,1) = colors[i].Y() - mean.Y();
|
||||
rdq(i,2) = colors[i].Z() - mean.Z();
|
||||
}
|
||||
|
||||
// perform a singular value decomposition
|
||||
SVD svd(rdq);
|
||||
|
||||
// get the principal component direction (well, the one with the largest weight)
|
||||
Vec4 direction(svd.R()(0,0), svd.R()(0,1), svd.R()(0,2), 0.0);
|
||||
Vector3 direction = Fit::computePrincipalComponent_EigenSolver(np, colors);
|
||||
|
||||
// project each pixel value along the principal direction
|
||||
double minp = DBL_MAX, maxp = -DBL_MAX;
|
||||
float minp = FLT_MAX, maxp = -FLT_MAX;
|
||||
for (int i = 0; i < np; i++)
|
||||
{
|
||||
float dp = rdq(i,0) * direction.X() + rdq(i,1)*direction.Y() + rdq(i,2)*direction.Z();
|
||||
float dp = dot(colors[i]-mean.xyz(), direction);
|
||||
if (dp < minp) minp = dp;
|
||||
if (dp > maxp) maxp = dp;
|
||||
}
|
||||
|
||||
// choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values
|
||||
endpts[region].A = mean + minp*direction;
|
||||
endpts[region].B = mean + maxp*direction;
|
||||
endpts[region].A = mean + minp*Vector4(direction, 0);
|
||||
endpts[region].B = mean + maxp*Vector4(direction, 0);
|
||||
|
||||
// clamp endpoints
|
||||
// the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best
|
||||
@ -1013,13 +1003,13 @@ static double rough(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS]
|
||||
return map_colors(tile, shapeindex, endpts);
|
||||
}
|
||||
|
||||
static void swap(double *list1, int *list2, int i, int j)
|
||||
static void swap(float *list1, int *list2, int i, int j)
|
||||
{
|
||||
double t = list1[i]; list1[i] = list1[j]; list1[j] = t;
|
||||
float t = list1[i]; list1[i] = list1[j]; list1[j] = t;
|
||||
int t1 = list2[i]; list2[i] = list2[j]; list2[j] = t1;
|
||||
}
|
||||
|
||||
double AVPCL::compress_mode3(const Tile &t, char *block)
|
||||
float AVPCL::compress_mode3(const Tile &t, char *block)
|
||||
{
|
||||
// number of rough cases to look at. reasonable values of this are 1, NSHAPES/4, and NSHAPES
|
||||
// NSHAPES/4 gets nearly all the cases; you can increase that a bit (say by 3 or 4) if you really want to squeeze the last bit out
|
||||
@ -1029,10 +1019,10 @@ double AVPCL::compress_mode3(const Tile &t, char *block)
|
||||
struct {
|
||||
FltEndpts endpts[NREGIONS];
|
||||
} all[NSHAPES];
|
||||
double roughmse[NSHAPES];
|
||||
float roughmse[NSHAPES];
|
||||
int index[NSHAPES];
|
||||
char tempblock[AVPCL::BLOCKSIZE];
|
||||
double msebest = DBL_MAX;
|
||||
float msebest = FLT_MAX;
|
||||
|
||||
for (int i=0; i<NSHAPES; ++i)
|
||||
{
|
||||
@ -1049,7 +1039,7 @@ double AVPCL::compress_mode3(const Tile &t, char *block)
|
||||
for (int i=0; i<NITEMS && msebest>0; ++i)
|
||||
{
|
||||
int shape = index[i];
|
||||
double mse = refine(t, shape, &all[shape].endpts[0], tempblock);
|
||||
float mse = refine(t, shape, &all[shape].endpts[0], tempblock);
|
||||
if (mse < msebest)
|
||||
{
|
||||
memcpy(block, tempblock, sizeof(tempblock));
|
||||
|
Reference in New Issue
Block a user