From 8acaf1ed96f3341cc16595db44a36ea01206133e Mon Sep 17 00:00:00 2001 From: drewcassidy Date: Mon, 22 Feb 2021 02:05:14 -0800 Subject: [PATCH] Implement first chunk of refinement code --- src/BC1/BC1Encoder.cpp | 100 +++++++++++++++++++++++++++++++---------- src/BC1/BC1Encoder.h | 14 +++++- src/BlockView.h | 5 ++- src/Color.h | 2 +- src/Vector4Int.h | 5 +++ 5 files changed, 98 insertions(+), 28 deletions(-) diff --git a/src/BC1/BC1Encoder.cpp b/src/BC1/BC1Encoder.cpp index 429a2fd..1945d6c 100644 --- a/src/BC1/BC1Encoder.cpp +++ b/src/BC1/BC1Encoder.cpp @@ -132,10 +132,28 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { EncodeResults orig; FindEndpoints(pixels, _flags, metrics, orig.low, orig.high); FindSelectors4(pixels, orig); - if (orig.low == orig.high) { + EncodeResults best = orig; + + const uint32_t total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1; + for (unsigned pass = 0; pass < total_ls_passes; pass++) { + EncodeResults trial = best; + Vector4 low, high; + + bool multicolor = ComputeEndpointsLS(pixels, trial, low, high, metrics); + if (multicolor) { + trial.low = Color::PreciseRound565(low); + trial.high = Color::PreciseRound565(high); + } + + if (trial.low == best.low && trial.high == best.high) break; + FindSelectors4(pixels, trial); + best = trial; + } + + if (best.low == best.high) { EncodeBlockSingleColor(metrics.avg, dest); } else { - EncodeBlock4Color(orig, dest); + EncodeBlock4Color(best, dest); } } } @@ -205,6 +223,7 @@ void BC1Encoder::EncodeBlockSingleColor(Color color, BC1Block *dest) const { } void BC1Encoder::EncodeBlock4Color(EncodeResults &block, BC1Block *dest) const { + const std::array lut = {0, 2, 3, 1}; if (block.low == block.high) { EncodeBlockSingleColor(block.low.ScaleFrom565() /* Color(255, 0, 255)*/, dest); return; @@ -218,25 +237,21 @@ void BC1Encoder::EncodeBlock4Color(EncodeResults &block, BC1Block *dest) const { mask = 0x55; } + BC1Block::UnpackedSelectors selectors; + + for (unsigned i = 0; i < 16; i++) { + unsigned x = i % 4; + unsigned y = i / 4; + selectors[y][x] = lut[block.selectors[i]]; + } + assert(low > high); dest->SetLowColor(low); dest->SetHighColor(high); - dest->PackSelectors(block.selectors, mask); + dest->PackSelectors(selectors, mask); } -void encode_bc1_pick_initial(const Color *pSrc_pixels, uint32_t flags, bool grayscale_flag, int min_r, int min_g, int min_b, int max_r, int max_g, int max_b, - int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b, int &lr, int &lg, int &lb, int &hr, int &hg, int &hb); - void BC1Encoder::FindEndpoints(Color4x4 pixels, BC1Encoder::Flags flags, const BC1Encoder::BlockMetrics metrics, Color &low, Color &high) const { - int lr, lg, lb, hr, hg, hb; - auto colors = pixels.Flatten(); - encode_bc1_pick_initial(&colors[0], (uint32_t)_flags, metrics.is_greyscale, metrics.min.r, metrics.min.g, metrics.min.b, metrics.max.r, metrics.max.g, - metrics.max.b, metrics.avg.r, metrics.avg.g, metrics.avg.b, metrics.sums[0], metrics.sums[1], metrics.sums[2], lr, lg, lb, hr, hg, - hb); - low = Color(lr, lg, lb); - high = Color(hr, hg, hb); - return; - if (metrics.is_greyscale) { // specialized greyscale case const unsigned fr = pixels.Get(0).r; @@ -276,8 +291,8 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, BC1Encoder::Flags flags, const B for (unsigned c = 0; c < 3; c++) { sums_xy[c] += val[chan0] * val[c]; } } - auto &sum_x = sums[chan0]; - auto &sum_xx = sums_xy[chan0]; + const auto &sum_x = sums[chan0]; + const auto &sum_xx = sums_xy[chan0]; float denominator = (float)(16 * sum_xx) - (float)(sum_x * sum_x); @@ -436,10 +451,6 @@ unsigned BC1Encoder::FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults & // colors in selector order, 0, 1, 2, 3 // 0 = low color, 1 = high color, 2/3 = interpolated std::array colors = _interpolator->InterpolateBC1(block.low, block.high, false); - // std::array colorVectors; - // for (unsigned i = 0; i < 4; i++) { colorVectors[i] = (Vector4Int)colors[i]; } - - const std::array selectors = {1, 3, 2, 0}; std::array colorVectors = {(Vector4Int)colors[0], (Vector4Int)colors[2], (Vector4Int)colors[3], (Vector4Int)colors[1]}; if (!use_err) { @@ -455,14 +466,57 @@ unsigned BC1Encoder::FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults & for (unsigned y = 0; y < 4; y++) { int dot = a.Dot((Vector4Int)pixels.Get(x, y)); unsigned level = (dot <= t0) + (dot < t1) + (dot < t2); - unsigned selector = selectors[level]; + unsigned selector = 3 - level; assert(level < 4); assert(selector < 4); - block.selectors[y][x] = selector; + block.selectors[x + (4 * y)] = selector; } } return 0; } return 0; } + +bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vector4 &low, Vector4 &high, BlockMetrics metrics, bool is_3color, + bool use_black) const { + Vector4 q00 = {0, 0, 0}; + unsigned weight_accum = 0; + for (unsigned i = 0; i < 16; i++) { + const Color color = pixels.Get(i); + const int sel = (int)block.selectors[i]; + + if (use_black && color.IsBlack()) continue; + if (is_3color && sel == 3) continue; // NOTE: selectors for 3-color are in linear order here, but not in original + assert(sel <= 3); + + const Vector4Int color_vector = Vector4Int::FromColorRGB(color); + q00 += color_vector * sel; + weight_accum += g_weight_vals4[sel]; + } + + int denominator = is_3color ? 2 : 3; + Vector4 q10 = (metrics.sums * denominator) - q00; + + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + // invert matrix + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) return false; + + det = ((float)denominator / 255.0f) / det; + + float iz00, iz01, iz10, iz11; + iz00 = z11 * det; + iz01 = -z01 * det; + iz10 = -z10 * det; + iz11 = z00 * det; + + low = (q00 * iz00) + (q10 * iz01); + high = (q00 * iz10) + (q10 * iz11); + + return true; +} } // namespace rgbcx \ No newline at end of file diff --git a/src/BC1/BC1Encoder.h b/src/BC1/BC1Encoder.h index 73c9a17..322d5f3 100644 --- a/src/BC1/BC1Encoder.h +++ b/src/BC1/BC1Encoder.h @@ -117,12 +117,12 @@ class BC1Encoder : public BlockEncoder { unsigned _orderings3; // Unpacked BC1 block with metadata - using UnpackedSelectors = std::array, 4>; struct EncodeResults { Color low; Color high; - UnpackedSelectors selectors; + std::array selectors; bool is_3_color; + bool is_1_color; }; void EncodeBlockSingleColor(Color color, BC1Block *dest) const; @@ -131,6 +131,9 @@ class BC1Encoder : public BlockEncoder { void FindEndpoints(Color4x4 pixels, Flags flags, BlockMetrics const metrics, Color &low, Color &high) const; unsigned FindSelectors4(Color4x4 pixels, EncodeResults &block, unsigned cur_err = 0, bool use_err = false) const; + bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vector4 &low, Vector4 &high, BlockMetrics metrics, bool is_3color = false, + bool use_black = false) const; + // match tables used for single-color blocks // Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible, // with an included error value @@ -154,5 +157,12 @@ class BC1Encoder : public BlockEncoder { static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3]; static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3]; + + // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) + // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. + static constexpr uint32_t g_weight_vals4[4] = {0x000009, 0x010204, 0x040201, 0x090000}; + + // multiplier is 4 for 3-color + static constexpr uint32_t g_weight_vals3[3] = {0x000004, 0x040000, 0x010101}; }; } // namespace rgbcx diff --git a/src/BlockView.h b/src/BlockView.h index 3d0154d..d064f89 100644 --- a/src/BlockView.h +++ b/src/BlockView.h @@ -19,14 +19,15 @@ #pragma once -#include #include +#include #include #include #include #include #include "Color.h" +#include "Vector4Int.h" #include "ndebug.h" namespace rgbcx { @@ -109,7 +110,7 @@ template class ColorBlockView : public BlockView sums; + Vector4Int sums; }; ColorBlockView(Color *start, int row_stride = N, int pixel_stride = 1) : Base(start, row_stride, pixel_stride) {} diff --git a/src/Color.h b/src/Color.h index 3976390..5183b0a 100644 --- a/src/Color.h +++ b/src/Color.h @@ -27,7 +27,6 @@ namespace rgbcx { class Vector4; class Vector4Int; - #pragma pack(push, 1) class Color { public: @@ -81,6 +80,7 @@ class Color { size_t MaxChannelRGB(); bool IsGrayscale() const { return ((r == g) && (r == b)); } + bool IsBlack() const { return (r | g | b) < 4; } int GetLuma() const { return (13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U; } // REC709 weightings diff --git a/src/Vector4Int.h b/src/Vector4Int.h index f636594..9cda53d 100644 --- a/src/Vector4Int.h +++ b/src/Vector4Int.h @@ -23,6 +23,7 @@ #include #include "Color.h" +#include "Vector4.h" namespace rgbcx { @@ -63,6 +64,10 @@ class Vector4Int { return _c[index]; } + operator Vector4() const { + return Vector4(_c[0], _c[1], _c[2], _c[3]); + } + friend Vector4Int operator+(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::plus()); } friend Vector4Int operator-(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::minus()); } friend Vector4Int operator*(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::multiplies()); }