From 528b93ab14f24bde4f33ce20857c7373075a28c2 Mon Sep 17 00:00:00 2001 From: castano Date: Thu, 24 Jun 2010 07:17:36 +0000 Subject: [PATCH] Replace tabs with spaces. --- src/nvtt/bc6h/zohone.cpp | 1062 +++++++++++++++++----------------- src/nvtt/bc6h/zohtwo.cpp | 1170 +++++++++++++++++++------------------- 2 files changed, 1117 insertions(+), 1115 deletions(-) diff --git a/src/nvtt/bc6h/zohone.cpp b/src/nvtt/bc6h/zohone.cpp index dc1e9d1..f8f6b62 100644 --- a/src/nvtt/bc6h/zohone.cpp +++ b/src/nvtt/bc6h/zohone.cpp @@ -35,7 +35,7 @@ using namespace nv; static int shapes[NSHAPES] = { - 0x0000 + 0x0000 }; // only 1 shape #define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) @@ -47,16 +47,16 @@ static int shapes[NSHAPES] = struct Chanpat { - int prec[NDELTA]; // precision pattern for one channel + int prec[NDELTA]; // precision pattern for one channel }; struct Pattern { - Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel - int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed - int mode; // associated mode value - int modebits; // number of mode bits - const char *encoding; // verilog description of encoding for this mode + Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode }; #define MAXMODEBITS 5 @@ -66,23 +66,23 @@ struct Pattern static Pattern patterns[NPATTERNS] = { - 16,4, 16,4, 16,4, 1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", - 12,8, 12,8, 12,8, 1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", - 11,9, 11,9, 11,9, 1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", - 10,10, 10,10, 10,10, 0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", + 16,4, 16,4, 16,4, 1, 0x0f, 5, "bw[10],bw[11],bw[12],bw[13],bw[14],bw[15],bx[3:0],gw[10],gw[11],gw[12],gw[13],gw[14],gw[15],gx[3:0],rw[10],rw[11],rw[12],rw[13],rw[14],rw[15],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", + 12,8, 12,8, 12,8, 1, 0x0b, 5, "bw[10],bw[11],bx[7:0],gw[10],gw[11],gx[7:0],rw[10],rw[11],rx[7:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", + 11,9, 11,9, 11,9, 1, 0x07, 5, "bw[10],bx[8:0],gw[10],gx[8:0],rw[10],rx[8:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", + 10,10, 10,10, 10,10, 0, 0x03, 5, "bx[9:0],gx[9:0],rx[9:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", }; // mapping of mode to the corresponding index in pattern static int mode_to_pat[MAXMODES] = { - -1,-1,-1, - 3, // 0x03 - -1,-1,-1, - 2, // 0x07 - -1,-1,-1, - 1, // 0x0b - -1,-1,-1, - 0, // 0x0f - -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, + -1,-1,-1, + 3, // 0x03 + -1,-1,-1, + 2, // 0x07 + -1,-1,-1, + 1, // 0x0b + -1,-1,-1, + 0, // 0x0f + -1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1,-1, }; #define R_0(ep) (ep)[0].A[i] @@ -92,702 +92,702 @@ static int mode_to_pat[MAXMODES] = { // compress endpoints static void compress_endpts(const IntEndpts in[NREGIONS_ONE], ComprEndpts out[NREGIONS_ONE], const Pattern &p) { - if (p.transformed) - { - for (int i=0; i> 2) & 3 and x = index & 3 static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex) { - int index_positions[NREGIONS_ONE]; + int index_positions[NREGIONS_ONE]; - index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 + index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 - for (int region = 0; region < NREGIONS_ONE; ++region) - { - int x = index_positions[region] & 3; - int y = (index_positions[region] >> 2) & 3; - nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table - if (indices[y][x] & HIGH_INDEXBIT) - { - // high bit is set, swap the endpts and indices for this region - int t; - for (int i=0; i> 2) & 3; + nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table + if (indices[y][x] & HIGH_INDEXBIT) + { + // high bit is set, swap the endpts and indices for this region + int t; + for (int i=0; i> endbit, len); break; - case FIELD_RW: out.write(rw >> endbit, len); break; - case FIELD_RX: out.write(rx >> endbit, len); break; - case FIELD_GW: out.write(gw >> endbit, len); break; - case FIELD_GX: out.write(gx >> endbit, len); break; - case FIELD_BW: out.write(bw >> endbit, len); break; - case FIELD_BX: out.write(bx >> endbit, len); break; + Utils::parse(p.encoding, ptr, field, endbit, len); + switch(field) + { + case FIELD_M: out.write( m >> endbit, len); break; + case FIELD_RW: out.write(rw >> endbit, len); break; + case FIELD_RX: out.write(rx >> endbit, len); break; + case FIELD_GW: out.write(gw >> endbit, len); break; + case FIELD_GX: out.write(gx >> endbit, len); break; + case FIELD_BW: out.write(bw >> endbit, len); break; + case FIELD_BX: out.write(bx >> endbit, len); break; - case FIELD_D: - case FIELD_RY: - case FIELD_RZ: - case FIELD_GY: - case FIELD_GZ: - case FIELD_BY: - case FIELD_BZ: - default: nvAssume(0); - } - } + case FIELD_D: + case FIELD_RY: + case FIELD_RZ: + case FIELD_GY: + case FIELD_GZ: + case FIELD_BY: + case FIELD_BZ: + default: nvAssume(0); + } + } } static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p) { - // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode - int mode = in.read(2); - if (mode != 0x00 && mode != 0x01) - mode = (in.read(3) << 2) | mode; + // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode + int mode = in.read(2); + if (mode != 0x00 && mode != 0x01) + mode = (in.read(3) << 2) | mode; - int pat_index = mode_to_pat[mode]; + int pat_index = mode_to_pat[mode]; - nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS); - nvDebugCheck (in.getptr() == patterns[pat_index].modebits); + nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS); + nvDebugCheck (in.getptr() == patterns[pat_index].modebits); - p = patterns[pat_index]; + p = patterns[pat_index]; - int d; - int rw, rx; - int gw, gx; - int bw, bx; + int d; + int rw, rx; + int gw, gx; + int bw, bx; - d = 0; - rw = rx = 0; - gw = gx = 0; - bw = bx = 0; + d = 0; + rw = rx = 0; + gw = gx = 0; + bw = bx = 0; - int ptr = strlen(p.encoding); + int ptr = strlen(p.encoding); - while (ptr) - { - Field field; - int endbit, len; + while (ptr) + { + Field field; + int endbit, len; - Utils::parse(p.encoding, ptr, field, endbit, len); + Utils::parse(p.encoding, ptr, field, endbit, len); - switch(field) - { - case FIELD_M: break; // already processed so ignore - case FIELD_RW: rw |= in.read(len) << endbit; break; - case FIELD_RX: rx |= in.read(len) << endbit; break; - case FIELD_GW: gw |= in.read(len) << endbit; break; - case FIELD_GX: gx |= in.read(len) << endbit; break; - case FIELD_BW: bw |= in.read(len) << endbit; break; - case FIELD_BX: bx |= in.read(len) << endbit; break; + switch(field) + { + case FIELD_M: break; // already processed so ignore + case FIELD_RW: rw |= in.read(len) << endbit; break; + case FIELD_RX: rx |= in.read(len) << endbit; break; + case FIELD_GW: gw |= in.read(len) << endbit; break; + case FIELD_GX: gx |= in.read(len) << endbit; break; + case FIELD_BW: bw |= in.read(len) << endbit; break; + case FIELD_BX: bx |= in.read(len) << endbit; break; - case FIELD_D: - case FIELD_RY: - case FIELD_RZ: - case FIELD_GY: - case FIELD_GZ: - case FIELD_BY: - case FIELD_BZ: - default: nvAssume(0); - } - } + case FIELD_D: + case FIELD_RY: + case FIELD_RZ: + case FIELD_GY: + case FIELD_GZ: + case FIELD_BY: + case FIELD_BZ: + default: nvAssume(0); + } + } - nvDebugCheck (in.getptr() == 128 - 63); + nvDebugCheck (in.getptr() == 128 - 63); - endpts[0].A[0] = rw; endpts[0].B[0] = rx; - endpts[0].A[1] = gw; endpts[0].B[1] = gx; - endpts[0].A[2] = bw; endpts[0].B[2] = bx; + endpts[0].A[0] = rw; endpts[0].B[0] = rx; + endpts[0].A[1] = gw; endpts[0].B[1] = gx; + endpts[0].A[2] = bw; endpts[0].B[2] = bx; } // compress index 0 static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) { - for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) - { - int x = POS_TO_X(pos); - int y = POS_TO_Y(pos); + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); - out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0)); - } + out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0)); + } } static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) { - Bits out(block, ZOH::BITSIZE); + Bits out(block, ZOH::BITSIZE); - write_header(endpts, p, out); + write_header(endpts, p, out); - write_indices(indices, shapeindex, out); + write_indices(indices, shapeindex, out); - nvDebugCheck(out.getptr() == ZOH::BITSIZE); + nvDebugCheck(out.getptr() == ZOH::BITSIZE); } static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES]) { - // scale endpoints - int a, b; // really need a IntVector3... + // scale endpoints + int a, b; // really need a IntVector3... - a = Utils::unquantize(endpts.A[0], prec); - b = Utils::unquantize(endpts.B[0], prec); + a = Utils::unquantize(endpts.A[0], prec); + b = Utils::unquantize(endpts.B[0], prec); - // interpolate - for (int i = 0; i < NINDICES; ++i) - palette[i].x = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].x = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); - a = Utils::unquantize(endpts.A[1], prec); - b = Utils::unquantize(endpts.B[1], prec); + a = Utils::unquantize(endpts.A[1], prec); + b = Utils::unquantize(endpts.B[1], prec); - // interpolate - for (int i = 0; i < NINDICES; ++i) - palette[i].y = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].y = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); - a = Utils::unquantize(endpts.A[2], prec); - b = Utils::unquantize(endpts.B[2], prec); + a = Utils::unquantize(endpts.A[2], prec); + b = Utils::unquantize(endpts.B[2], prec); - // interpolate - for (int i = 0; i < NINDICES; ++i) - palette[i].z = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].z = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); } // position 0 was compressed static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) { - for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) - { - int x = POS_TO_X(pos); - int y = POS_TO_Y(pos); + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); - indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0)); - } + indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0)); + } } void ZOH::decompressone(const char *block, Tile &t) { - Bits in(block, ZOH::BITSIZE); + Bits in(block, ZOH::BITSIZE); - Pattern p; - IntEndpts endpts[NREGIONS_ONE]; - ComprEndpts compr_endpts[NREGIONS_ONE]; + Pattern p; + IntEndpts endpts[NREGIONS_ONE]; + ComprEndpts compr_endpts[NREGIONS_ONE]; - read_header(in, compr_endpts, p); - int shapeindex = 0; // only one shape - - decompress_endpts(compr_endpts, endpts, p); + read_header(in, compr_endpts, p); + int shapeindex = 0; // only one shape - Vector3 palette[NREGIONS_ONE][NINDICES]; - for (int r = 0; r < NREGIONS_ONE; ++r) - generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); + decompress_endpts(compr_endpts, endpts, p); - // read indices - int indices[Tile::TILE_H][Tile::TILE_W]; + Vector3 palette[NREGIONS_ONE][NINDICES]; + for (int r = 0; r < NREGIONS_ONE; ++r) + generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); - read_indices(in, shapeindex, indices); + // read indices + int indices[Tile::TILE_H][Tile::TILE_W]; - nvDebugCheck(in.getptr() == ZOH::BITSIZE); + read_indices(in, shapeindex, indices); - // lookup - for (int y = 0; y < Tile::TILE_H; y++) + nvDebugCheck(in.getptr() == ZOH::BITSIZE); + + // lookup + for (int y = 0; y < Tile::TILE_H; y++) for (int x = 0; x < Tile::TILE_W; x++) - t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; + t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; } // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr static double map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec) { - Vector3 palette[NINDICES]; - double toterr = 0; - Vector3 err; + Vector3 palette[NINDICES]; + double toterr = 0; + Vector3 err; - generate_palette_quantized(endpts, prec, palette); + generate_palette_quantized(endpts, prec, palette); - for (int i = 0; i < np; ++i) - { - double err, besterr; + for (int i = 0; i < np; ++i) + { + double err, besterr; - besterr = Utils::norm(colors[i], palette[0]) * importance[i]; + besterr = Utils::norm(colors[i], palette[0]) * importance[i]; - for (int j = 1; j < NINDICES && besterr > 0; ++j) - { - err = Utils::norm(colors[i], palette[j]) * importance[i]; + for (int j = 1; j < NINDICES && besterr > 0; ++j) + { + err = Utils::norm(colors[i], palette[j]) * importance[i]; - if (err > besterr) // error increased, so we're done searching - break; - if (err < besterr) - besterr = err; - } - toterr += besterr; - } - return toterr; + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; } // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec, - int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_ONE]) + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_ONE]) { - // build list of possibles - Vector3 palette[NREGIONS_ONE][NINDICES]; + // build list of possibles + Vector3 palette[NREGIONS_ONE][NINDICES]; - for (int region = 0; region < NREGIONS_ONE; ++region) - { - generate_palette_quantized(endpts[region], prec, &palette[region][0]); - toterr[region] = 0; - } + for (int region = 0; region < NREGIONS_ONE; ++region) + { + generate_palette_quantized(endpts[region], prec, &palette[region][0]); + toterr[region] = 0; + } - Vector3 err; + Vector3 err; - for (int y = 0; y < tile.size_y; y++) + for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { - int region = REGION(x,y,shapeindex); - double err, besterr; + int region = REGION(x,y,shapeindex); + double err, besterr; - besterr = Utils::norm(tile.data[y][x], palette[region][0]); - indices[y][x] = 0; + besterr = Utils::norm(tile.data[y][x], palette[region][0]); + indices[y][x] = 0; - for (int i = 1; i < NINDICES && besterr > 0; ++i) - { - err = Utils::norm(tile.data[y][x], palette[region][i]); + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]); - if (err > besterr) // error increased, so we're done searching - break; - if (err < besterr) - { - besterr = err; - indices[y][x] = i; - } - } - toterr[region] += besterr; - } + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } } static double perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts, - double old_err, int do_b) + double old_err, int do_b) { - // we have the old endpoints: old_endpts - // we have the perturbed endpoints: new_endpts - // we have the temporary endpoints: temp_endpts + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts - IntEndpts temp_endpts; - float min_err = old_err; // start with the best current error - int beststep; + IntEndpts temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; - // copy real endpoints so we can perturb them - for (int i=0; i>= 1) - { - bool improved = false; - for (int sign = -1; sign <= 1; sign += 2) - { - if (do_b == 0) - { - temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; - if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) - continue; - } - else - { - temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; - if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) - continue; - } + // do a logarithmic search for the best error for this endpoint (which) + for (int step = 1 << (prec-1); step; step >>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } - float err = map_colors(colors, importance, np, temp_endpts, prec); + float err = map_colors(colors, importance, np, temp_endpts, prec); - if (err < min_err) - { - improved = true; - min_err = err; - beststep = sign * step; - } - } - // if this was an improvement, move the endpoint and continue search from there - if (improved) - { - if (do_b == 0) - new_endpts.A[ch] += beststep; - else - new_endpts.B[ch] += beststep; - } - } - return min_err; + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + } + } + // if this was an improvement, move the endpoint and continue search from there + if (improved) + { + if (do_b == 0) + new_endpts.A[ch] += beststep; + else + new_endpts.B[ch] += beststep; + } + } + return min_err; } static void optimize_one(const Vector3 colors[], const float importance[], int np, double orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts) { - double opt_err = orig_err; - for (int ch = 0; ch < NCHANNELS; ++ch) - { - opt_endpts.A[ch] = orig_endpts.A[ch]; - opt_endpts.B[ch] = orig_endpts.B[ch]; - } - /* - err0 = perturb(rgb0, delta0) - err1 = perturb(rgb1, delta1) - if (err0 < err1) - if (err0 >= initial_error) break - rgb0 += delta0 - next = 1 - else - if (err1 >= initial_error) break - rgb1 += delta1 - next = 0 - initial_err = map() - for (;;) - err = perturb(next ? rgb1:rgb0, delta) - if (err >= initial_err) break - next? rgb1 : rgb0 += delta - initial_err = err + double opt_err = orig_err; + for (int ch = 0; ch < NCHANNELS; ++ch) + { + opt_endpts.A[ch] = orig_endpts.A[ch]; + opt_endpts.B[ch] = orig_endpts.B[ch]; + } + /* + err0 = perturb(rgb0, delta0) + err1 = perturb(rgb1, delta1) + if (err0 < err1) + if (err0 >= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err */ - IntEndpts new_a, new_b; - IntEndpts new_endpt; - int do_b; + IntEndpts new_a, new_b; + IntEndpts new_endpt; + int do_b; - // now optimize each channel separately - for (int ch = 0; ch < NCHANNELS; ++ch) - { - // figure out which endpoint when perturbed gives the most improvement and start there - // if we just alternate, we can easily end up in a local minima - float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A - float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B - if (err0 < err1) - { - if (err0 >= opt_err) - continue; + if (err0 < err1) + { + if (err0 >= opt_err) + continue; - opt_endpts.A[ch] = new_a.A[ch]; - opt_err = err0; - do_b = 1; // do B next - } - else - { - if (err1 >= opt_err) - continue; - opt_endpts.B[ch] = new_b.B[ch]; - opt_err = err1; - do_b = 0; // do A next - } - - // now alternate endpoints and keep trying until there is no improvement - for (;;) - { - float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); - if (err >= opt_err) - break; - if (do_b == 0) - opt_endpts.A[ch] = new_endpt.A[ch]; - else - opt_endpts.B[ch] = new_endpt.B[ch]; - opt_err = err; - do_b = 1 - do_b; // now move the other endpoint - } - } + opt_endpts.A[ch] = new_a.A[ch]; + opt_err = err0; + do_b = 1; // do B next + } + else + { + if (err1 >= opt_err) + continue; + opt_endpts.B[ch] = new_b.B[ch]; + opt_err = err1; + do_b = 0; // do A next + } + + // now alternate endpoints and keep trying until there is no improvement + for (;;) + { + float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); + if (err >= opt_err) + break; + if (do_b == 0) + opt_endpts.A[ch] = new_endpt.A[ch]; + else + opt_endpts.B[ch] = new_endpt.B[ch]; + opt_err = err; + do_b = 1 - do_b; // now move the other endpoint + } + } } static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS_ONE], - const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE]) + const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE]) { - Vector3 pixels[Tile::TILE_TOTAL]; - float importance[Tile::TILE_TOTAL]; - double err = 0; + Vector3 pixels[Tile::TILE_TOTAL]; + float importance[Tile::TILE_TOTAL]; + double err = 0; - for (int region=0; region 0; ++i) - { - err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; - if (err > besterr) // error increased, so we're done searching - break; - if (err < besterr) - besterr = err; - } - toterr += besterr; - } - return toterr; + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; } double ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE]) { - for (int region=0; region maxp) maxp = dp; - } + // project each pixel value along the principal direction + float minp = FLT_MAX, maxp = -FLT_MAX; + for (int i = 0; i < np; i++) + { + float dp = dot(colors[i]-mean, direction); + if (dp < minp) minp = dp; + if (dp > maxp) maxp = dp; + } - // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values - endpts[region].A = mean + minp*direction; - endpts[region].B = mean + maxp*direction; + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; - // clamp endpoints - // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best - // shape based on endpoints being clamped - Utils::clamp(endpts[region].A); - Utils::clamp(endpts[region].B); - } + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + Utils::clamp(endpts[region].A); + Utils::clamp(endpts[region].B); + } - return map_colors(tile, shapeindex, endpts); + return map_colors(tile, shapeindex, endpts); } double ZOH::compressone(const Tile &t, char *block) { - int shapeindex_best = 0; - FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE]; - double msebest = DBL_MAX; + int shapeindex_best = 0; + FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE]; + double msebest = DBL_MAX; - /* + /* collect the mse values that are within 5% of the best values optimize each one and choose the best */ - // hack for now -- just use the best value WORK - for (int i=0; i0.0; ++i) - { - double mse = roughone(t, i, tempendpts); - if (mse < msebest) - { - msebest = mse; - shapeindex_best = i; - memcpy(endptsbest, tempendpts, sizeof(endptsbest)); - } + // hack for now -- just use the best value WORK + for (int i=0; i0.0; ++i) + { + double mse = roughone(t, i, tempendpts); + if (mse < msebest) + { + msebest = mse; + shapeindex_best = i; + memcpy(endptsbest, tempendpts, sizeof(endptsbest)); + } - } - return refineone(t, shapeindex_best, endptsbest, block); + } + return refineone(t, shapeindex_best, endptsbest, block); } diff --git a/src/nvtt/bc6h/zohtwo.cpp b/src/nvtt/bc6h/zohtwo.cpp index ebda9cc..5f97b32 100644 --- a/src/nvtt/bc6h/zohtwo.cpp +++ b/src/nvtt/bc6h/zohtwo.cpp @@ -72,16 +72,16 @@ using namespace nv; struct Chanpat { - int prec[NDELTA]; // precision pattern for one channel + int prec[NDELTA]; // precision pattern for one channel }; struct Pattern { - Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel - int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed - int mode; // associated mode value - int modebits; // number of mode bits - const char *encoding; // verilog description of encoding for this mode + Chanpat chan[NCHANNELS]; // allow different bit patterns per channel -- but we still want constant precision per channel + int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed + int mode; // associated mode value + int modebits; // number of mode bits + const char *encoding; // verilog description of encoding for this mode }; #define MAXMODEBITS 5 @@ -91,39 +91,39 @@ struct Pattern static Pattern patterns[NPATTERNS] = { - 11,5,5,5, 11,4,4,4, 11,4,4,4, 1, 0x02, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],rw[10],rx[4:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", - 11,4,4,4, 11,5,5,5, 11,4,4,4, 1, 0x06, 5, "d[4:0],bz[3],gy[4],rz[3:0],bz[2],bz[0],ry[3:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],gw[10],gx[4:0],gy[3:0],gz[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", - 11,4,4,4, 11,4,4,4, 11,5,5,5, 1, 0x0a, 5, "d[4:0],bz[3],bz[4],rz[3:0],bz[2:1],ry[3:0],by[3:0],bw[10],bx[4:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],by[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", - 10,5,5,5, 10,5,5,5, 10,5,5,5, 1, 0x00, 2, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bw[9:0],gw[9:0],rw[9:0],bz[4],by[4],gy[4],m[1:0]", - 9,5,5,5, 9,5,5,5, 9,5,5,5, 1, 0x0e, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bw[8:0],gy[4],gw[8:0],by[4],rw[8:0],m[4:0]", - 8,6,6,6, 8,5,5,5, 8,5,5,5, 1, 0x12, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],rx[5:0],bz[4:3],bw[7:0],gy[4],bz[2],gw[7:0],by[4],gz[4],rw[7:0],m[4:0]", - 8,5,5,5, 8,6,6,6, 8,5,5,5, 1, 0x16, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],gx[5:0],gy[3:0],gz[4],rx[4:0],bz[4],gz[5],bw[7:0],gy[4],gy[5],gw[7:0],by[4],bz[0],rw[7:0],m[4:0]", - 8,5,5,5, 8,5,5,5, 8,6,6,6, 1, 0x1a, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bx[5:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bz[5],bw[7:0],gy[4],by[5],gw[7:0],by[4],bz[1],rw[7:0],m[4:0]", - 7,6,6,6, 7,6,6,6, 7,6,6,6, 1, 0x01, 2, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],bw[6:0],gy[4],bz[2],by[5],gw[6:0],by[4],bz[1:0],rw[6:0],gz[5:4],gy[5],m[1:0]", - 6,6,6,6, 6,6,6,6, 6,6,6,6, 0, 0x1e, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],gz[5],bw[5:0],gy[4],bz[2],by[5],gy[5],gw[5:0],by[4],bz[1:0],gz[4],rw[5:0],m[4:0]", + 11,5,5,5, 11,4,4,4, 11,4,4,4, 1, 0x02, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],rw[10],rx[4:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", + 11,4,4,4, 11,5,5,5, 11,4,4,4, 1, 0x06, 5, "d[4:0],bz[3],gy[4],rz[3:0],bz[2],bz[0],ry[3:0],by[3:0],bz[1],bw[10],bx[3:0],gz[3:0],gw[10],gx[4:0],gy[3:0],gz[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", + 11,4,4,4, 11,4,4,4, 11,5,5,5, 1, 0x0a, 5, "d[4:0],bz[3],bz[4],rz[3:0],bz[2:1],ry[3:0],by[3:0],bw[10],bx[4:0],gz[3:0],bz[0],gw[10],gx[3:0],gy[3:0],by[4],rw[10],rx[3:0],bw[9:0],gw[9:0],rw[9:0],m[4:0]", + 10,5,5,5, 10,5,5,5, 10,5,5,5, 1, 0x00, 2, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bw[9:0],gw[9:0],rw[9:0],bz[4],by[4],gy[4],m[1:0]", + 9,5,5,5, 9,5,5,5, 9,5,5,5, 1, 0x0e, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bw[8:0],gy[4],gw[8:0],by[4],rw[8:0],m[4:0]", + 8,6,6,6, 8,5,5,5, 8,5,5,5, 1, 0x12, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bz[1],bx[4:0],gz[3:0],bz[0],gx[4:0],gy[3:0],rx[5:0],bz[4:3],bw[7:0],gy[4],bz[2],gw[7:0],by[4],gz[4],rw[7:0],m[4:0]", + 8,5,5,5, 8,6,6,6, 8,5,5,5, 1, 0x16, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bz[1],bx[4:0],gz[3:0],gx[5:0],gy[3:0],gz[4],rx[4:0],bz[4],gz[5],bw[7:0],gy[4],gy[5],gw[7:0],by[4],bz[0],rw[7:0],m[4:0]", + 8,5,5,5, 8,5,5,5, 8,6,6,6, 1, 0x1a, 5, "d[4:0],bz[3],rz[4:0],bz[2],ry[4:0],by[3:0],bx[5:0],gz[3:0],bz[0],gx[4:0],gy[3:0],gz[4],rx[4:0],bz[4],bz[5],bw[7:0],gy[4],by[5],gw[7:0],by[4],bz[1],rw[7:0],m[4:0]", + 7,6,6,6, 7,6,6,6, 7,6,6,6, 1, 0x01, 2, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],bw[6:0],gy[4],bz[2],by[5],gw[6:0],by[4],bz[1:0],rw[6:0],gz[5:4],gy[5],m[1:0]", + 6,6,6,6, 6,6,6,6, 6,6,6,6, 0, 0x1e, 5, "d[4:0],rz[5:0],ry[5:0],by[3:0],bx[5:0],gz[3:0],gx[5:0],gy[3:0],rx[5:0],bz[4],bz[5],bz[3],gz[5],bw[5:0],gy[4],bz[2],by[5],gy[5],gw[5:0],by[4],bz[1:0],gz[4],rw[5:0],m[4:0]", }; // mapping of mode to the corresponding index in pattern // UNUSED ZOH MODES are 0x13, 0x17, 0x1b, 0x1f -- return -2 for these static int mode_to_pat[MAXMODES] = { - 3, // 0x00 - 8, // 0x01 - 0, // 0x02 - -1,-1,-1, - 1, // 0x06 - -1,-1,-1, - 2, // 0x0a - -1,-1,-1, - 4, // 0x0e - -1,-1,-1, - 5, // 0x12 - -2,-1,-1, - 6, // 0x16 - -2,-1,-1, - 7, // 0x1a - -2,-1,-1, - 9, // 0x1e - -2 + 3, // 0x00 + 8, // 0x01 + 0, // 0x02 + -1,-1,-1, + 1, // 0x06 + -1,-1,-1, + 2, // 0x0a + -1,-1,-1, + 4, // 0x0e + -1,-1,-1, + 5, // 0x12 + -2,-1,-1, + 6, // 0x16 + -2,-1,-1, + 7, // 0x1a + -2,-1,-1, + 9, // 0x1e + -2 }; #define R_0(ep) (ep)[0].A[i] @@ -135,742 +135,744 @@ static int mode_to_pat[MAXMODES] = { // compress endpoints static void compress_endpts(const IntEndpts in[NREGIONS_TWO], ComprEndpts out[NREGIONS_TWO], const Pattern &p) { - if (p.transformed) - { - for (int i=0; i> endbit, len); break; - case FIELD_D: out.write( d >> endbit, len); break; - case FIELD_RW: out.write(rw >> endbit, len); break; - case FIELD_RX: out.write(rx >> endbit, len); break; - case FIELD_RY: out.write(ry >> endbit, len); break; - case FIELD_RZ: out.write(rz >> endbit, len); break; - case FIELD_GW: out.write(gw >> endbit, len); break; - case FIELD_GX: out.write(gx >> endbit, len); break; - case FIELD_GY: out.write(gy >> endbit, len); break; - case FIELD_GZ: out.write(gz >> endbit, len); break; - case FIELD_BW: out.write(bw >> endbit, len); break; - case FIELD_BX: out.write(bx >> endbit, len); break; - case FIELD_BY: out.write(by >> endbit, len); break; - case FIELD_BZ: out.write(bz >> endbit, len); break; - default: nvAssume(0); - } - } + Utils::parse(p.encoding, ptr, field, endbit, len); + switch(field) + { + case FIELD_M: out.write( m >> endbit, len); break; + case FIELD_D: out.write( d >> endbit, len); break; + case FIELD_RW: out.write(rw >> endbit, len); break; + case FIELD_RX: out.write(rx >> endbit, len); break; + case FIELD_RY: out.write(ry >> endbit, len); break; + case FIELD_RZ: out.write(rz >> endbit, len); break; + case FIELD_GW: out.write(gw >> endbit, len); break; + case FIELD_GX: out.write(gx >> endbit, len); break; + case FIELD_GY: out.write(gy >> endbit, len); break; + case FIELD_GZ: out.write(gz >> endbit, len); break; + case FIELD_BW: out.write(bw >> endbit, len); break; + case FIELD_BX: out.write(bx >> endbit, len); break; + case FIELD_BY: out.write(by >> endbit, len); break; + case FIELD_BZ: out.write(bz >> endbit, len); break; + default: nvAssume(0); + } + } } static bool read_header(Bits &in, ComprEndpts endpts[NREGIONS_TWO], int &shapeindex, Pattern &p) { - // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode - int mode = in.read(2); - if (mode != 0x00 && mode != 0x01) - mode = (in.read(3) << 2) | mode; + // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode + int mode = in.read(2); + if (mode != 0x00 && mode != 0x01) + mode = (in.read(3) << 2) | mode; - int pat_index = mode_to_pat[mode]; + int pat_index = mode_to_pat[mode]; - if (pat_index == -2) - return false; // reserved mode found + if (pat_index == -2) + return false; // reserved mode found - nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS); - nvDebugCheck (in.getptr() == patterns[pat_index].modebits); + nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS); + nvDebugCheck (in.getptr() == patterns[pat_index].modebits); - p = patterns[pat_index]; + p = patterns[pat_index]; - int d; - int rw, rx, ry, rz; - int gw, gx, gy, gz; - int bw, bx, by, bz; + int d; + int rw, rx, ry, rz; + int gw, gx, gy, gz; + int bw, bx, by, bz; - d = 0; - rw = rx = ry = rz = 0; - gw = gx = gy = gz = 0; - bw = bx = by = bz = 0; + d = 0; + rw = rx = ry = rz = 0; + gw = gx = gy = gz = 0; + bw = bx = by = bz = 0; - int ptr = strlen(p.encoding); + int ptr = strlen(p.encoding); - while (ptr) - { - Field field; - int endbit, len; + while (ptr) + { + Field field; + int endbit, len; - Utils::parse(p.encoding, ptr, field, endbit, len); + Utils::parse(p.encoding, ptr, field, endbit, len); - switch(field) - { - case FIELD_M: break; // already processed so ignore - case FIELD_D: d |= in.read(len) << endbit; break; - case FIELD_RW: rw |= in.read(len) << endbit; break; - case FIELD_RX: rx |= in.read(len) << endbit; break; - case FIELD_RY: ry |= in.read(len) << endbit; break; - case FIELD_RZ: rz |= in.read(len) << endbit; break; - case FIELD_GW: gw |= in.read(len) << endbit; break; - case FIELD_GX: gx |= in.read(len) << endbit; break; - case FIELD_GY: gy |= in.read(len) << endbit; break; - case FIELD_GZ: gz |= in.read(len) << endbit; break; - case FIELD_BW: bw |= in.read(len) << endbit; break; - case FIELD_BX: bx |= in.read(len) << endbit; break; - case FIELD_BY: by |= in.read(len) << endbit; break; - case FIELD_BZ: bz |= in.read(len) << endbit; break; - default: nvAssume(0); - } - } + switch(field) + { + case FIELD_M: break; // already processed so ignore + case FIELD_D: d |= in.read(len) << endbit; break; + case FIELD_RW: rw |= in.read(len) << endbit; break; + case FIELD_RX: rx |= in.read(len) << endbit; break; + case FIELD_RY: ry |= in.read(len) << endbit; break; + case FIELD_RZ: rz |= in.read(len) << endbit; break; + case FIELD_GW: gw |= in.read(len) << endbit; break; + case FIELD_GX: gx |= in.read(len) << endbit; break; + case FIELD_GY: gy |= in.read(len) << endbit; break; + case FIELD_GZ: gz |= in.read(len) << endbit; break; + case FIELD_BW: bw |= in.read(len) << endbit; break; + case FIELD_BX: bx |= in.read(len) << endbit; break; + case FIELD_BY: by |= in.read(len) << endbit; break; + case FIELD_BZ: bz |= in.read(len) << endbit; break; + default: nvAssume(0); + } + } - nvDebugCheck (in.getptr() == 128 - 46); + nvDebugCheck (in.getptr() == 128 - 46); - shapeindex = d; - endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz; - endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz; - endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz; + shapeindex = d; + endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[1].A[0] = ry; endpts[1].B[0] = rz; + endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[1].A[1] = gy; endpts[1].B[1] = gz; + endpts[0].A[2] = bw; endpts[0].B[2] = bx; endpts[1].A[2] = by; endpts[1].B[2] = bz; - return true; + return true; } static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) { - int positions[NREGIONS_TWO]; + int positions[NREGIONS_TWO]; - for (int r = 0; r < NREGIONS_TWO; ++r) - positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r); + for (int r = 0; r < NREGIONS_TWO; ++r) + positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r); - for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) - { - int x = POS_TO_X(pos); - int y = POS_TO_Y(pos); + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); - bool match = false; + bool match = false; - for (int r = 0; r < NREGIONS_TWO; ++r) - if (positions[r] == pos) { match = true; break; } + for (int r = 0; r < NREGIONS_TWO; ++r) + if (positions[r] == pos) { match = true; break; } - out.write(indices[y][x], INDEXBITS - (match ? 1 : 0)); - } + out.write(indices[y][x], INDEXBITS - (match ? 1 : 0)); + } } static void emit_block(const ComprEndpts compr_endpts[NREGIONS_TWO], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) { - Bits out(block, ZOH::BITSIZE); + Bits out(block, ZOH::BITSIZE); - write_header(compr_endpts, shapeindex, p, out); + write_header(compr_endpts, shapeindex, p, out); - write_indices(indices, shapeindex, out); + write_indices(indices, shapeindex, out); - nvDebugCheck(out.getptr() == ZOH::BITSIZE); + nvDebugCheck(out.getptr() == ZOH::BITSIZE); } static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES]) { - // scale endpoints - int a, b; // really need a IntVector3... + // scale endpoints + int a, b; // really need a IntVector3... - a = Utils::unquantize(endpts.A[0], prec); - b = Utils::unquantize(endpts.B[0], prec); + a = Utils::unquantize(endpts.A[0], prec); + b = Utils::unquantize(endpts.B[0], prec); - // interpolate - for (int i = 0; i < NINDICES; ++i) - palette[i].x = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].x = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); - a = Utils::unquantize(endpts.A[1], prec); - b = Utils::unquantize(endpts.B[1], prec); + a = Utils::unquantize(endpts.A[1], prec); + b = Utils::unquantize(endpts.B[1], prec); - // interpolate - for (int i = 0; i < NINDICES; ++i) - palette[i].y = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].y = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); - a = Utils::unquantize(endpts.A[2], prec); - b = Utils::unquantize(endpts.B[2], prec); + a = Utils::unquantize(endpts.A[2], prec); + b = Utils::unquantize(endpts.B[2], prec); - // interpolate - for (int i = 0; i < NINDICES; ++i) - palette[i].z = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); + // interpolate + for (int i = 0; i < NINDICES; ++i) + palette[i].z = Utils::finish_unquantize(PALETTE_LERP(a, b, i, DENOM), prec); } static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) { - int positions[NREGIONS_TWO]; + int positions[NREGIONS_TWO]; - for (int r = 0; r < NREGIONS_TWO; ++r) - positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r); + for (int r = 0; r < NREGIONS_TWO; ++r) + positions[r] = SHAPEINDEX_TO_COMPRESSED_INDICES(shapeindex,r); - for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) - { - int x = POS_TO_X(pos); - int y = POS_TO_Y(pos); + for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) + { + int x = POS_TO_X(pos); + int y = POS_TO_Y(pos); - bool match = false; + bool match = false; - for (int r = 0; r < NREGIONS_TWO; ++r) - if (positions[r] == pos) { match = true; break; } + for (int r = 0; r < NREGIONS_TWO; ++r) + if (positions[r] == pos) { match = true; break; } - indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0)); - } + indices[y][x]= in.read(INDEXBITS - (match ? 1 : 0)); + } } void ZOH::decompresstwo(const char *block, Tile &t) { - Bits in(block, ZOH::BITSIZE); + Bits in(block, ZOH::BITSIZE); - Pattern p; - IntEndpts endpts[NREGIONS_TWO]; - ComprEndpts compr_endpts[NREGIONS_TWO]; - int shapeindex; + Pattern p; + IntEndpts endpts[NREGIONS_TWO]; + ComprEndpts compr_endpts[NREGIONS_TWO]; + int shapeindex; - if (!read_header(in, compr_endpts, shapeindex, p)) - { - // reserved mode, return all zeroes - for (int y = 0; y < Tile::TILE_H; y++) - for (int x = 0; x < Tile::TILE_W; x++) - t.data[y][x] = Vector3 (zero); + if (!read_header(in, compr_endpts, shapeindex, p)) + { + // reserved mode, return all zeroes + for (int y = 0; y < Tile::TILE_H; y++) + for (int x = 0; x < Tile::TILE_W; x++) + t.data[y][x] = Vector3 (zero); - return; - } - - decompress_endpts(compr_endpts, endpts, p); + return; + } - Vector3 palette[NREGIONS_TWO][NINDICES]; - for (int r = 0; r < NREGIONS_TWO; ++r) - generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); + decompress_endpts(compr_endpts, endpts, p); - int indices[Tile::TILE_H][Tile::TILE_W]; + Vector3 palette[NREGIONS_TWO][NINDICES]; + for (int r = 0; r < NREGIONS_TWO; ++r) + generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); - read_indices(in, shapeindex, indices); + int indices[Tile::TILE_H][Tile::TILE_W]; - nvDebugCheck(in.getptr() == ZOH::BITSIZE); + read_indices(in, shapeindex, indices); - // lookup - for (int y = 0; y < Tile::TILE_H; y++) + nvDebugCheck(in.getptr() == ZOH::BITSIZE); + + // lookup + for (int y = 0; y < Tile::TILE_H; y++) for (int x = 0; x < Tile::TILE_W; x++) - t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; + t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; } // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr static double map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec) { - Vector3 palette[NINDICES]; - double toterr = 0; - Vector3 err; + Vector3 palette[NINDICES]; + double toterr = 0; + Vector3 err; - generate_palette_quantized(endpts, prec, palette); + generate_palette_quantized(endpts, prec, palette); - for (int i = 0; i < np; ++i) - { - double err, besterr; + for (int i = 0; i < np; ++i) + { + double err, besterr; - besterr = Utils::norm(colors[i], palette[0]) * importance[i]; + besterr = Utils::norm(colors[i], palette[0]) * importance[i]; - for (int j = 1; j < NINDICES && besterr > 0; ++j) - { - err = Utils::norm(colors[i], palette[j]) * importance[i]; + for (int j = 1; j < NINDICES && besterr > 0; ++j) + { + err = Utils::norm(colors[i], palette[j]) * importance[i]; - if (err > besterr) // error increased, so we're done searching - break; - if (err < besterr) - besterr = err; - } - toterr += besterr; - } - return toterr; + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; } // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_TWO], int prec, - int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_TWO]) + int indices[Tile::TILE_H][Tile::TILE_W], double toterr[NREGIONS_TWO]) { - // build list of possibles - Vector3 palette[NREGIONS_TWO][NINDICES]; + // build list of possibles + Vector3 palette[NREGIONS_TWO][NINDICES]; - for (int region = 0; region < NREGIONS_TWO; ++region) - { - generate_palette_quantized(endpts[region], prec, &palette[region][0]); - toterr[region] = 0; - } + for (int region = 0; region < NREGIONS_TWO; ++region) + { + generate_palette_quantized(endpts[region], prec, &palette[region][0]); + toterr[region] = 0; + } - Vector3 err; + Vector3 err; - for (int y = 0; y < tile.size_y; y++) + for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { - int region = REGION(x,y,shapeindex); - double err, besterr; + int region = REGION(x,y,shapeindex); + double err, besterr; - besterr = Utils::norm(tile.data[y][x], palette[region][0]); - indices[y][x] = 0; + besterr = Utils::norm(tile.data[y][x], palette[region][0]); + indices[y][x] = 0; - for (int i = 1; i < NINDICES && besterr > 0; ++i) - { - err = Utils::norm(tile.data[y][x], palette[region][i]); + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]); - if (err > besterr) // error increased, so we're done searching - break; - if (err < besterr) - { - besterr = err; - indices[y][x] = i; - } - } - toterr[region] += besterr; - } + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + { + besterr = err; + indices[y][x] = i; + } + } + toterr[region] += besterr; + } } static double perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts, - double old_err, int do_b) + double old_err, int do_b) { - // we have the old endpoints: old_endpts - // we have the perturbed endpoints: new_endpts - // we have the temporary endpoints: temp_endpts + // we have the old endpoints: old_endpts + // we have the perturbed endpoints: new_endpts + // we have the temporary endpoints: temp_endpts - IntEndpts temp_endpts; - float min_err = old_err; // start with the best current error - int beststep; + IntEndpts temp_endpts; + float min_err = old_err; // start with the best current error + int beststep; - // copy real endpoints so we can perturb them - for (int i=0; i>= 1) - { - bool improved = false; - for (int sign = -1; sign <= 1; sign += 2) - { - if (do_b == 0) - { - temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; - if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) - continue; - } - else - { - temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; - if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) - continue; - } + // do a logarithmic search for the best error for this endpoint (which) + for (int step = 1 << (prec-1); step; step >>= 1) + { + bool improved = false; + for (int sign = -1; sign <= 1; sign += 2) + { + if (do_b == 0) + { + temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; + if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) + continue; + } + else + { + temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; + if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) + continue; + } - float err = map_colors(colors, importance, np, temp_endpts, prec); + float err = map_colors(colors, importance, np, temp_endpts, prec); - if (err < min_err) - { - improved = true; - min_err = err; - beststep = sign * step; - } - } - // if this was an improvement, move the endpoint and continue search from there - if (improved) - { - if (do_b == 0) - new_endpts.A[ch] += beststep; - else - new_endpts.B[ch] += beststep; - } - } - return min_err; + if (err < min_err) + { + improved = true; + min_err = err; + beststep = sign * step; + } + } + // if this was an improvement, move the endpoint and continue search from there + if (improved) + { + if (do_b == 0) + new_endpts.A[ch] += beststep; + else + new_endpts.B[ch] += beststep; + } + } + return min_err; } static void optimize_one(const Vector3 colors[], const float importance[], int np, double orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts) { - double opt_err = orig_err; - for (int ch = 0; ch < NCHANNELS; ++ch) - { - opt_endpts.A[ch] = orig_endpts.A[ch]; - opt_endpts.B[ch] = orig_endpts.B[ch]; - } - /* - err0 = perturb(rgb0, delta0) - err1 = perturb(rgb1, delta1) - if (err0 < err1) - if (err0 >= initial_error) break - rgb0 += delta0 - next = 1 - else - if (err1 >= initial_error) break - rgb1 += delta1 - next = 0 - initial_err = map() - for (;;) - err = perturb(next ? rgb1:rgb0, delta) - if (err >= initial_err) break - next? rgb1 : rgb0 += delta - initial_err = err - */ - IntEndpts new_a, new_b; - IntEndpts new_endpt; - int do_b; + double opt_err = orig_err; + for (int ch = 0; ch < NCHANNELS; ++ch) + { + opt_endpts.A[ch] = orig_endpts.A[ch]; + opt_endpts.B[ch] = orig_endpts.B[ch]; + } + /* + err0 = perturb(rgb0, delta0) + err1 = perturb(rgb1, delta1) + if (err0 < err1) + if (err0 >= initial_error) break + rgb0 += delta0 + next = 1 + else + if (err1 >= initial_error) break + rgb1 += delta1 + next = 0 + initial_err = map() + for (;;) + err = perturb(next ? rgb1:rgb0, delta) + if (err >= initial_err) break + next? rgb1 : rgb0 += delta + initial_err = err + */ + IntEndpts new_a, new_b; + IntEndpts new_endpt; + int do_b; - // now optimize each channel separately - for (int ch = 0; ch < NCHANNELS; ++ch) - { - // figure out which endpoint when perturbed gives the most improvement and start there - // if we just alternate, we can easily end up in a local minima - float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A - float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B + // now optimize each channel separately + for (int ch = 0; ch < NCHANNELS; ++ch) + { + // figure out which endpoint when perturbed gives the most improvement and start there + // if we just alternate, we can easily end up in a local minima + float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A + float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B - if (err0 < err1) - { - if (err0 >= opt_err) - continue; + if (err0 < err1) + { + if (err0 >= opt_err) + continue; - opt_endpts.A[ch] = new_a.A[ch]; - opt_err = err0; - do_b = 1; // do B next - } - else - { - if (err1 >= opt_err) - continue; - opt_endpts.B[ch] = new_b.B[ch]; - opt_err = err1; - do_b = 0; // do A next - } - - // now alternate endpoints and keep trying until there is no improvement - for (;;) - { - float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); - if (err >= opt_err) - break; - if (do_b == 0) - opt_endpts.A[ch] = new_endpt.A[ch]; - else - opt_endpts.B[ch] = new_endpt.B[ch]; - opt_err = err; - do_b = 1 - do_b; // now move the other endpoint - } - } + opt_endpts.A[ch] = new_a.A[ch]; + opt_err = err0; + do_b = 1; // do B next + } + else + { + if (err1 >= opt_err) + continue; + opt_endpts.B[ch] = new_b.B[ch]; + opt_err = err1; + do_b = 0; // do A next + } + + // now alternate endpoints and keep trying until there is no improvement + for (;;) + { + float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); + if (err >= opt_err) + break; + if (do_b == 0) + opt_endpts.A[ch] = new_endpt.A[ch]; + else + opt_endpts.B[ch] = new_endpt.B[ch]; + opt_err = err; + do_b = 1 - do_b; // now move the other endpoint + } + } } static void optimize_endpts(const Tile &tile, int shapeindex, const double orig_err[NREGIONS_TWO], - const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO]) + const IntEndpts orig_endpts[NREGIONS_TWO], int prec, IntEndpts opt_endpts[NREGIONS_TWO]) { - Vector3 pixels[Tile::TILE_TOTAL]; - float importance[Tile::TILE_TOTAL]; - double err = 0; + Vector3 pixels[Tile::TILE_TOTAL]; + float importance[Tile::TILE_TOTAL]; + double err = 0; - for (int region=0; region 0; ++i) - { - err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; + for (int i = 1; i < NINDICES && besterr > 0; ++i) + { + err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; - if (err > besterr) // error increased, so we're done searching - break; - if (err < besterr) - besterr = err; - } - toterr += besterr; - } - return toterr; + if (err > besterr) // error increased, so we're done searching + break; + if (err < besterr) + besterr = err; + } + toterr += besterr; + } + return toterr; } double ZOH::roughtwo(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_TWO]) { - for (int region=0; region maxp) maxp = dp; - } + // project each pixel value along the principal direction + float minp = FLT_MAX, maxp = -FLT_MAX; + for (int i = 0; i < np; i++) + { + float dp = dot(colors[i]-mean, direction); + if (dp < minp) minp = dp; + if (dp > maxp) maxp = dp; + } - // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values - endpts[region].A = mean + minp*direction; - endpts[region].B = mean + maxp*direction; + // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values + endpts[region].A = mean + minp*direction; + endpts[region].B = mean + maxp*direction; - // clamp endpoints - // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best - // shape based on endpoints being clamped - Utils::clamp(endpts[region].A); - Utils::clamp(endpts[region].B); - } + // clamp endpoints + // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best + // shape based on endpoints being clamped + Utils::clamp(endpts[region].A); + Utils::clamp(endpts[region].B); + } - return map_colors(tile, shapeindex, endpts); + return map_colors(tile, shapeindex, endpts); } double ZOH::compresstwo(const Tile &t, char *block) { - int shapeindex_best = 0; - FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO]; - double msebest = DBL_MAX; + int shapeindex_best = 0; + FltEndpts endptsbest[NREGIONS_TWO], tempendpts[NREGIONS_TWO]; + double msebest = DBL_MAX; - /* - collect the mse values that are within 5% of the best values - optimize each one and choose the best - */ - // hack for now -- just use the best value WORK - for (int i=0; i0.0; ++i) - { - double mse = roughtwo(t, i, tempendpts); - if (mse < msebest) - { - msebest = mse; - shapeindex_best = i; - memcpy(endptsbest, tempendpts, sizeof(endptsbest)); - } + /* + collect the mse values that are within 5% of the best values + optimize each one and choose the best + */ + // hack for now -- just use the best value WORK + for (int i=0; i0.0; ++i) + { + double mse = roughtwo(t, i, tempendpts); + if (mse < msebest) + { + msebest = mse; + shapeindex_best = i; + memcpy(endptsbest, tempendpts, sizeof(endptsbest)); + } - } - return refinetwo(t, shapeindex_best, endptsbest, block); + } + return refinetwo(t, shapeindex_best, endptsbest, block); }