Improved first-pass refinement

This commit is contained in:
Andrew Cassidy 2021-02-23 19:44:36 -08:00
parent 8acaf1ed96
commit 2e5cf991b0
5 changed files with 130 additions and 52 deletions

View File

@ -102,7 +102,7 @@ template <size_t S> void PrepSingleColorTable(MatchList &match_table, MatchList
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) {
PrepSingleColorTable<5>(*_single_match5, *_single_match5_half, *_interpolator);
PrepSingleColorTable<6>(*_single_match6, *_single_match6_half, *_interpolator);
_flags = Flags::BoundingBoxInt;
_flags = Flags::UseFullMSEEval | Flags::TwoLeastSquaresPasses;
}
void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
@ -124,37 +124,44 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
needs_block_error |= (_search_rounds > 0);
needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None);
unsigned cur_err = UINT_MAX;
unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
unsigned total_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1;
if (!needs_block_error || true) {
// assert((_flags & Flags::TryAllInitialEndponts) == Flags::None);
EncodeResults result;
for (unsigned round = 0; round < total_rounds; round++) {
Flags modified_flags = _flags;
if (round == 1) {
modified_flags &= ~(Flags::Use2DLS | Flags::BoundingBoxInt);
modified_flags |= Flags::BoundingBox;
}
EncodeResults orig;
FindEndpoints(pixels, _flags, metrics, orig.low, orig.high);
FindSelectors4(pixels, orig);
EncodeResults best = orig;
EncodeResults round_result;
FindEndpoints(pixels, modified_flags, metrics, round_result.low, round_result.high);
FindSelectors4(pixels, round_result, needs_block_error);
const uint32_t total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
for (unsigned pass = 0; pass < total_ls_passes; pass++) {
EncodeResults trial = best;
EncodeResults trial_result = round_result;
Vector4 low, high;
bool multicolor = ComputeEndpointsLS(pixels, trial, low, high, metrics);
if (multicolor) {
trial.low = Color::PreciseRound565(low);
trial.high = Color::PreciseRound565(high);
bool multicolor = ComputeEndpointsLS(pixels, trial_result, metrics, false, false);
if (trial_result.low == round_result.low && trial_result.high == round_result.high) break;
FindSelectors4(pixels, trial_result, needs_block_error);
if (!needs_block_error || trial_result.error < round_result.error) {
round_result = trial_result;
} else {
break;
}
if (trial.low == best.low && trial.high == best.high) break;
FindSelectors4(pixels, trial);
best = trial;
}
if (!needs_block_error || round_result.error < result.error) { result = round_result; }
}
if (best.low == best.high) {
EncodeBlockSingleColor(metrics.avg, dest);
} else {
EncodeBlock4Color(best, dest);
}
if (result.low == result.high) {
EncodeBlockSingleColor(metrics.avg, dest);
} else {
EncodeBlock4Color(result, dest);
}
}
@ -447,38 +454,96 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, BC1Encoder::Flags flags, const B
}
}
unsigned BC1Encoder::FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, unsigned int cur_err, bool use_err) const {
unsigned BC1Encoder::FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, bool use_err) const {
// colors in selector order, 0, 1, 2, 3
// 0 = low color, 1 = high color, 2/3 = interpolated
std::array<Color, 4> colors = _interpolator->InterpolateBC1(block.low, block.high, false);
std::array<Vector4Int, 4> colorVectors = {(Vector4Int)colors[0], (Vector4Int)colors[2], (Vector4Int)colors[3], (Vector4Int)colors[1]};
std::array<Vector4Int, 4> color_vectors = {(Vector4Int)colors[0], (Vector4Int)colors[2], (Vector4Int)colors[3], (Vector4Int)colors[1]};
unsigned total_error = 0;
if (!use_err) {
Vector4Int a = colorVectors[3] - colorVectors[0];
Color high = block.high.ScaleFrom565();
Color low = block.low.ScaleFrom565();
if (!use_err || (_flags & Flags::UseFasterMSEEval) != Flags::None) {
Vector4Int axis = color_vectors[3] - color_vectors[0];
std::array<int, 4> dots;
for (unsigned i = 0; i < 4; i++) { dots[i] = a.Dot(colorVectors[i]); }
for (unsigned i = 0; i < 4; i++) { dots[i] = axis.Dot(color_vectors[i]); }
int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
a *= 2;
axis *= 2;
for (unsigned x = 0; x < 4; x++) {
for (unsigned y = 0; y < 4; y++) {
int dot = a.Dot((Vector4Int)pixels.Get(x, y));
unsigned level = (dot <= t0) + (dot < t1) + (dot < t2);
unsigned selector = 3 - level;
assert(level < 4);
assert(selector < 4);
block.selectors[x + (4 * y)] = selector;
for (unsigned i = 0; i < 16; i++) {
Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
int dot = axis.Dot(pixel_vector);
uint8_t level = (dot <= t0) + (dot < t1) + (dot < t2);
uint8_t selector = 3 - level;
assert(level < 4);
assert(selector < 4);
if ((_flags & Flags::UseFasterMSEEval) != Flags::None) {
// llvm is just going to unswitch this anyways so its not an issue
auto diff = pixel_vector - color_vectors[selector];
total_error += diff.SqrMag();
if (i % 4 != 0 && total_error >= block.error) break; // check only once per row if we're generating too much error
}
block.selectors[i] = selector;
}
} else if ((_flags & Flags::UseFullMSEEval) != Flags::None) {
for (unsigned i = 0; i < 16; i++) {
unsigned best_error = UINT_MAX;
uint8_t best_sel = 0;
Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
// exhasustively check every pixel's distance from each color, and calculate the error
for (uint8_t j = 0; j < 4; j++) {
auto diff = color_vectors[j] - pixel_vector;
unsigned err = diff.SqrMag();
if (err < best_error || ((err == best_error) && (j == 3))) {
best_error = err;
best_sel = j;
}
}
total_error += best_error;
if (total_error >= block.error) break;
block.selectors[i] = best_sel;
}
} else {
Vector4Int axis = color_vectors[3] - color_vectors[0];
const float f = 4.0f / ((float)axis.SqrMag() + .00000125f);
for (unsigned i = 0; i < 16; i++) {
Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
auto diff = pixel_vector - color_vectors[0];
float sel_f = (float)diff.Dot(axis) * f + 0.5f;
uint8_t sel = (uint8_t)clampi((int)sel_f, 1, 3);
unsigned err0 = (color_vectors[sel - 1] - pixel_vector).SqrMag();
unsigned err1 = (color_vectors[sel] - pixel_vector).SqrMag();
uint8_t best_sel = sel;
unsigned best_err = err1;
if (err0 == err1) {
// prefer non-interpolation
if ((best_sel) == 1) best_sel = 0;
} else if (err0 < best_err) {
best_sel = sel - 1;
best_err = err0;
}
total_error += best_err;
if (total_error >= block.error) break;
block.selectors[i] = best_sel;
}
return 0;
}
return 0;
block.is_3_color = false;
block.is_1_color = false;
block.error = total_error;
return total_error;
}
bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vector4 &low, Vector4 &high, BlockMetrics metrics, bool is_3color,
bool use_black) const {
bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, bool is_3color, bool use_black) const {
Vector4 low, high;
Vector4 q00 = {0, 0, 0};
unsigned weight_accum = 0;
for (unsigned i = 0; i < 16; i++) {
@ -486,7 +551,7 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto
const int sel = (int)block.selectors[i];
if (use_black && color.IsBlack()) continue;
if (is_3color && sel == 3) continue; // NOTE: selectors for 3-color are in linear order here, but not in original
if (is_3color && sel == 3) continue; // NOTE: selectors for 3-color are in linear order here, but not in original
assert(sel <= 3);
const Vector4Int color_vector = Vector4Int::FromColorRGB(color);
@ -504,7 +569,10 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto
// invert matrix
float det = z00 * z11 - z01 * z10;
if (fabs(det) < 1e-8f) return false;
if (fabs(det) < 1e-8f) {
block.is_1_color = true;
return false;
}
det = ((float)denominator / 255.0f) / det;
@ -517,6 +585,9 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto
low = (q00 * iz00) + (q10 * iz01);
high = (q00 * iz10) + (q10 * iz11);
block.is_1_color = false;
block.low = Color::PreciseRound565(low);
block.high = Color::PreciseRound565(high);
return true;
}
} // namespace rgbcx

View File

@ -94,7 +94,7 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
Exhaustive = 8192,
// Try 2 different ways of choosing the initial endpoints.
TryAllInitialEndponts = 16384,
TryAllInitialEndpoints = 16384,
// Same as BoundingBox, but implemented using integer math (faster, slightly less quality)
BoundingBoxInt = 32768,
@ -123,16 +123,16 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
std::array<uint8_t, 16> selectors;
bool is_3_color;
bool is_1_color;
unsigned error = UINT_MAX;
};
void EncodeBlockSingleColor(Color color, BC1Block *dest) const;
void EncodeBlock4Color(EncodeResults &block, BC1Block *dest) const;
void FindEndpoints(Color4x4 pixels, Flags flags, BlockMetrics const metrics, Color &low, Color &high) const;
unsigned FindSelectors4(Color4x4 pixels, EncodeResults &block, unsigned cur_err = 0, bool use_err = false) const;
unsigned FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, bool use_err) const;
bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vector4 &low, Vector4 &high, BlockMetrics metrics, bool is_3color = false,
bool use_black = false) const;
bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, bool is_3color, bool use_black) const;
// match tables used for single-color blocks
// Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible,

View File

@ -108,6 +108,13 @@ Color Color::Max(const Color &a, const Color &b) { return Color(std::max(a[0], b
Color::operator Vector4() const { return Vector4(r, g, b, a); }
Color::operator Vector4Int() const { return Vector4Int(r, g, b, a);}
Vector4Int operator-(const Color &lhs, const Color &rhs) {
Vector4Int result;
for (unsigned i = 0; i < 4; i++) {
result[i] = (int)lhs[i] - rhs[i];
}
return result;
}
uint16_t Color::Pack565() const { return Pack565(r, g, b); }
uint16_t Color::Pack565Unscaled() const { return Pack565Unscaled(r, g, b); }

View File

@ -63,6 +63,7 @@ class Color {
operator Vector4() const;
operator Vector4Int() const;
friend Vector4Int operator-(const Color &lhs, const Color &rhs);
void SetRGBA(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va);
void SetRGBA(const Color &other) { SetRGBA(other.r, other.g, other.b, other.a); }

View File

@ -64,9 +64,7 @@ class Vector4Int {
return _c[index];
}
operator Vector4() const {
return Vector4(_c[0], _c[1], _c[2], _c[3]);
}
operator Vector4() const { return Vector4(_c[0], _c[1], _c[2], _c[3]); }
friend Vector4Int operator+(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::plus()); }
friend Vector4Int operator-(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::minus()); }
@ -99,6 +97,7 @@ class Vector4Int {
}
return max;
}
unsigned int SqrMag() { return (unsigned)Dot(*this, *this); }
private:
template <typename Op> friend Vector4Int DoOp(const Vector4Int &lhs, const Vector4Int &rhs, Op f) {