/* Copyright 2007 nVidia, Inc. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ // one region zoh compress/decompress code // Thanks to Jacob Munkberg (jacob@cs.lth.se) for the shortcut of using SVD to do the equivalent of principal components analysis #include "bits.h" #include "tile.h" #include "zoh.h" #include "zoh_utils.h" #include "nvmath/Vector.inl" #include "nvmath/Fitting.h" #include // strlen #include // FLT_MAX using namespace nv; using namespace ZOH; #define NINDICES 16 #define INDEXBITS 4 #define HIGH_INDEXBIT (1<<(INDEXBITS-1)) #define DENOM (NINDICES-1) #define NSHAPES 1 static const int shapes[NSHAPES] = { 0x0000 }; // only 1 shape #define REGION(x,y,shapeindex) ((shapes[shapeindex]&(1<<(15-(x)-4*(y))))!=0) #define POS_TO_X(pos) ((pos)&3) #define POS_TO_Y(pos) (((pos)>>2)&3) #define NDELTA 2 struct Chanpat { int prec[NDELTA]; // precision pattern for one channel }; struct Pattern { Chanpat chan[NCHANNELS];// allow different bit patterns per channel -- but we still want constant precision per channel int transformed; // if 0, deltas are unsigned and no transform; otherwise, signed and transformed int mode; // associated mode value int modebits; // number of mode bits const char *encoding; // verilog description of encoding for this mode }; #define MAXMODEBITS 5 #define MAXMODES (1<> 2) & 3 and x = index & 3 static void swap_indices(IntEndpts endpts[NREGIONS_ONE], int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex) { int index_positions[NREGIONS_ONE]; index_positions[0] = 0; // since WLOG we have the high bit of the shapes at 0 for (int region = 0; region < NREGIONS_ONE; ++region) { int x = index_positions[region] & 3; int y = (index_positions[region] >> 2) & 3; nvDebugCheck(REGION(x,y,shapeindex) == region); // double check the table if (indices[y][x] & HIGH_INDEXBIT) { // high bit is set, swap the endpts and indices for this region int t; for (int i=0; i> endbit, len); break; case FIELD_RW: out.write(rw >> endbit, len); break; case FIELD_RX: out.write(rx >> endbit, len); break; case FIELD_GW: out.write(gw >> endbit, len); break; case FIELD_GX: out.write(gx >> endbit, len); break; case FIELD_BW: out.write(bw >> endbit, len); break; case FIELD_BX: out.write(bx >> endbit, len); break; case FIELD_D: case FIELD_RY: case FIELD_RZ: case FIELD_GY: case FIELD_GZ: case FIELD_BY: case FIELD_BZ: default: nvUnreachable(); } } } static void read_header(Bits &in, ComprEndpts endpts[NREGIONS_ONE], Pattern &p) { // reading isn't quite symmetric with writing -- we don't know the encoding until we decode the mode int mode = in.read(2); if (mode != 0x00 && mode != 0x01) mode = (in.read(3) << 2) | mode; int pat_index = mode_to_pat[mode]; nvDebugCheck (pat_index >= 0 && pat_index < NPATTERNS); nvDebugCheck (in.getptr() == patterns[pat_index].modebits); p = patterns[pat_index]; int d; int rw, rx; int gw, gx; int bw, bx; d = 0; rw = rx = 0; gw = gx = 0; bw = bx = 0; int ptr = int(strlen(p.encoding)); while (ptr) { Field field; int endbit, len; // !!!UNDONE: get rid of string parsing!!! Utils::parse(p.encoding, ptr, field, endbit, len); switch(field) { case FIELD_M: break; // already processed so ignore case FIELD_RW: rw |= in.read(len) << endbit; break; case FIELD_RX: rx |= in.read(len) << endbit; break; case FIELD_GW: gw |= in.read(len) << endbit; break; case FIELD_GX: gx |= in.read(len) << endbit; break; case FIELD_BW: bw |= in.read(len) << endbit; break; case FIELD_BX: bx |= in.read(len) << endbit; break; case FIELD_D: case FIELD_RY: case FIELD_RZ: case FIELD_GY: case FIELD_GZ: case FIELD_BY: case FIELD_BZ: default: nvUnreachable(); } } nvDebugCheck (in.getptr() == 128 - 63); endpts[0].A[0] = rw; endpts[0].B[0] = rx; endpts[0].A[1] = gw; endpts[0].B[1] = gx; endpts[0].A[2] = bw; endpts[0].B[2] = bx; } // compress index 0 static void write_indices(const int indices[Tile::TILE_H][Tile::TILE_W], int shapeindex, Bits &out) { for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) { int x = POS_TO_X(pos); int y = POS_TO_Y(pos); out.write(indices[y][x], INDEXBITS - ((pos == 0) ? 1 : 0)); } } static void emit_block(const ComprEndpts endpts[NREGIONS_ONE], int shapeindex, const Pattern &p, const int indices[Tile::TILE_H][Tile::TILE_W], char *block) { Bits out(block, ZOH::BITSIZE); write_header(endpts, p, out); write_indices(indices, shapeindex, out); nvDebugCheck(out.getptr() == ZOH::BITSIZE); } static void generate_palette_quantized(const IntEndpts &endpts, int prec, Vector3 palette[NINDICES]) { // scale endpoints int a, b; // really need a IntVector3... a = Utils::unquantize(endpts.A[0], prec); b = Utils::unquantize(endpts.B[0], prec); // interpolate for (int i = 0; i < NINDICES; ++i) palette[i].x = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); a = Utils::unquantize(endpts.A[1], prec); b = Utils::unquantize(endpts.B[1], prec); // interpolate for (int i = 0; i < NINDICES; ++i) palette[i].y = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); a = Utils::unquantize(endpts.A[2], prec); b = Utils::unquantize(endpts.B[2], prec); // interpolate for (int i = 0; i < NINDICES; ++i) palette[i].z = float(Utils::finish_unquantize(Utils::lerp(a, b, i, DENOM), prec)); } // position 0 was compressed static void read_indices(Bits &in, int shapeindex, int indices[Tile::TILE_H][Tile::TILE_W]) { for (int pos = 0; pos < Tile::TILE_TOTAL; ++pos) { int x = POS_TO_X(pos); int y = POS_TO_Y(pos); indices[y][x]= in.read(INDEXBITS - ((pos == 0) ? 1 : 0)); } } void ZOH::decompressone(const char *block, Tile &t) { Bits in(block, ZOH::BITSIZE); Pattern p; IntEndpts endpts[NREGIONS_ONE]; ComprEndpts compr_endpts[NREGIONS_ONE]; read_header(in, compr_endpts, p); int shapeindex = 0; // only one shape decompress_endpts(compr_endpts, endpts, p); Vector3 palette[NREGIONS_ONE][NINDICES]; for (int r = 0; r < NREGIONS_ONE; ++r) generate_palette_quantized(endpts[r], p.chan[0].prec[0], &palette[r][0]); // read indices int indices[Tile::TILE_H][Tile::TILE_W]; read_indices(in, shapeindex, indices); nvDebugCheck(in.getptr() == ZOH::BITSIZE); // lookup for (int y = 0; y < Tile::TILE_H; y++) for (int x = 0; x < Tile::TILE_W; x++) t.data[y][x] = palette[REGION(x,y,shapeindex)][indices[y][x]]; } // given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr static float map_colors(const Vector3 colors[], const float importance[], int np, const IntEndpts &endpts, int prec) { Vector3 palette[NINDICES]; float toterr = 0; Vector3 err; generate_palette_quantized(endpts, prec, palette); for (int i = 0; i < np; ++i) { float err, besterr; besterr = Utils::norm(colors[i], palette[0]) * importance[i]; for (int j = 1; j < NINDICES && besterr > 0; ++j) { err = Utils::norm(colors[i], palette[j]) * importance[i]; if (err > besterr) // error increased, so we're done searching break; if (err < besterr) besterr = err; } toterr += besterr; } return toterr; } // assign indices given a tile, shape, and quantized endpoints, return toterr for each region static void assign_indices(const Tile &tile, int shapeindex, IntEndpts endpts[NREGIONS_ONE], int prec, int indices[Tile::TILE_H][Tile::TILE_W], float toterr[NREGIONS_ONE]) { // build list of possibles Vector3 palette[NREGIONS_ONE][NINDICES]; for (int region = 0; region < NREGIONS_ONE; ++region) { generate_palette_quantized(endpts[region], prec, &palette[region][0]); toterr[region] = 0; } Vector3 err; for (int y = 0; y < tile.size_y; y++) for (int x = 0; x < tile.size_x; x++) { int region = REGION(x,y,shapeindex); float err, besterr; besterr = Utils::norm(tile.data[y][x], palette[region][0]); indices[y][x] = 0; for (int i = 1; i < NINDICES && besterr > 0; ++i) { err = Utils::norm(tile.data[y][x], palette[region][i]); if (err > besterr) // error increased, so we're done searching break; if (err < besterr) { besterr = err; indices[y][x] = i; } } toterr[region] += besterr; } } static float perturb_one(const Vector3 colors[], const float importance[], int np, int ch, int prec, const IntEndpts &old_endpts, IntEndpts &new_endpts, float old_err, int do_b) { // we have the old endpoints: old_endpts // we have the perturbed endpoints: new_endpts // we have the temporary endpoints: temp_endpts IntEndpts temp_endpts; float min_err = old_err; // start with the best current error int beststep; // copy real endpoints so we can perturb them for (int i=0; i>= 1) { bool improved = false; for (int sign = -1; sign <= 1; sign += 2) { if (do_b == 0) { temp_endpts.A[ch] = new_endpts.A[ch] + sign * step; if (temp_endpts.A[ch] < 0 || temp_endpts.A[ch] >= (1 << prec)) continue; } else { temp_endpts.B[ch] = new_endpts.B[ch] + sign * step; if (temp_endpts.B[ch] < 0 || temp_endpts.B[ch] >= (1 << prec)) continue; } float err = map_colors(colors, importance, np, temp_endpts, prec); if (err < min_err) { improved = true; min_err = err; beststep = sign * step; } } // if this was an improvement, move the endpoint and continue search from there if (improved) { if (do_b == 0) new_endpts.A[ch] += beststep; else new_endpts.B[ch] += beststep; } } return min_err; } static void optimize_one(const Vector3 colors[], const float importance[], int np, float orig_err, const IntEndpts &orig_endpts, int prec, IntEndpts &opt_endpts) { float opt_err = orig_err; for (int ch = 0; ch < NCHANNELS; ++ch) { opt_endpts.A[ch] = orig_endpts.A[ch]; opt_endpts.B[ch] = orig_endpts.B[ch]; } /* err0 = perturb(rgb0, delta0) err1 = perturb(rgb1, delta1) if (err0 < err1) if (err0 >= initial_error) break rgb0 += delta0 next = 1 else if (err1 >= initial_error) break rgb1 += delta1 next = 0 initial_err = map() for (;;) err = perturb(next ? rgb1:rgb0, delta) if (err >= initial_err) break next? rgb1 : rgb0 += delta initial_err = err */ IntEndpts new_a, new_b; IntEndpts new_endpt; int do_b; // now optimize each channel separately for (int ch = 0; ch < NCHANNELS; ++ch) { // figure out which endpoint when perturbed gives the most improvement and start there // if we just alternate, we can easily end up in a local minima float err0 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_a, opt_err, 0); // perturb endpt A float err1 = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_b, opt_err, 1); // perturb endpt B if (err0 < err1) { if (err0 >= opt_err) continue; opt_endpts.A[ch] = new_a.A[ch]; opt_err = err0; do_b = 1; // do B next } else { if (err1 >= opt_err) continue; opt_endpts.B[ch] = new_b.B[ch]; opt_err = err1; do_b = 0; // do A next } // now alternate endpoints and keep trying until there is no improvement for (;;) { float err = perturb_one(colors, importance, np, ch, prec, opt_endpts, new_endpt, opt_err, do_b); if (err >= opt_err) break; if (do_b == 0) opt_endpts.A[ch] = new_endpt.A[ch]; else opt_endpts.B[ch] = new_endpt.B[ch]; opt_err = err; do_b = 1 - do_b; // now move the other endpoint } } } static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_err[NREGIONS_ONE], const IntEndpts orig_endpts[NREGIONS_ONE], int prec, IntEndpts opt_endpts[NREGIONS_ONE]) { Vector3 pixels[Tile::TILE_TOTAL]; float importance[Tile::TILE_TOTAL]; float err = 0; for (int region=0; region 0; ++i) { err = Utils::norm(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x]; if (err > besterr) // error increased, so we're done searching break; if (err < besterr) besterr = err; } toterr += besterr; } return toterr; } float ZOH::roughone(const Tile &tile, int shapeindex, FltEndpts endpts[NREGIONS_ONE]) { for (int region=0; region maxp) maxp = dp; } // choose as endpoints 2 points along the principal direction that span the projections of all of the pixel values endpts[region].A = mean + minp*direction; endpts[region].B = mean + maxp*direction; // clamp endpoints // the argument for clamping is that the actual endpoints need to be clamped and thus we need to choose the best // shape based on endpoints being clamped Utils::clamp(endpts[region].A); Utils::clamp(endpts[region].B); } return map_colors(tile, shapeindex, endpts); } float ZOH::compressone(const Tile &t, char *block) { int shapeindex_best = 0; FltEndpts endptsbest[NREGIONS_ONE], tempendpts[NREGIONS_ONE]; float msebest = FLT_MAX; /* collect the mse values that are within 5% of the best values optimize each one and choose the best */ // hack for now -- just use the best value WORK for (int i=0; i0.0; ++i) { float mse = roughone(t, i, tempendpts); if (mse < msebest) { msebest = mse; shapeindex_best = i; memcpy(endptsbest, tempendpts, sizeof(endptsbest)); } } return refineone(t, shapeindex_best, endptsbest, block); }