mirror of
https://github.com/drewcassidy/quicktex.git
synced 2024-09-13 06:37:34 +00:00
Improved first-pass refinement
This commit is contained in:
parent
8acaf1ed96
commit
2e5cf991b0
@ -102,7 +102,7 @@ template <size_t S> void PrepSingleColorTable(MatchList &match_table, MatchList
|
||||
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) {
|
||||
PrepSingleColorTable<5>(*_single_match5, *_single_match5_half, *_interpolator);
|
||||
PrepSingleColorTable<6>(*_single_match6, *_single_match6_half, *_interpolator);
|
||||
_flags = Flags::BoundingBoxInt;
|
||||
_flags = Flags::UseFullMSEEval | Flags::TwoLeastSquaresPasses;
|
||||
}
|
||||
|
||||
void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
|
||||
@ -124,37 +124,44 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
|
||||
needs_block_error |= (_search_rounds > 0);
|
||||
needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None);
|
||||
|
||||
unsigned cur_err = UINT_MAX;
|
||||
unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
|
||||
unsigned total_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1;
|
||||
|
||||
if (!needs_block_error || true) {
|
||||
// assert((_flags & Flags::TryAllInitialEndponts) == Flags::None);
|
||||
EncodeResults result;
|
||||
for (unsigned round = 0; round < total_rounds; round++) {
|
||||
Flags modified_flags = _flags;
|
||||
if (round == 1) {
|
||||
modified_flags &= ~(Flags::Use2DLS | Flags::BoundingBoxInt);
|
||||
modified_flags |= Flags::BoundingBox;
|
||||
}
|
||||
|
||||
EncodeResults orig;
|
||||
FindEndpoints(pixels, _flags, metrics, orig.low, orig.high);
|
||||
FindSelectors4(pixels, orig);
|
||||
EncodeResults best = orig;
|
||||
EncodeResults round_result;
|
||||
FindEndpoints(pixels, modified_flags, metrics, round_result.low, round_result.high);
|
||||
FindSelectors4(pixels, round_result, needs_block_error);
|
||||
|
||||
const uint32_t total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
|
||||
for (unsigned pass = 0; pass < total_ls_passes; pass++) {
|
||||
EncodeResults trial = best;
|
||||
EncodeResults trial_result = round_result;
|
||||
Vector4 low, high;
|
||||
|
||||
bool multicolor = ComputeEndpointsLS(pixels, trial, low, high, metrics);
|
||||
if (multicolor) {
|
||||
trial.low = Color::PreciseRound565(low);
|
||||
trial.high = Color::PreciseRound565(high);
|
||||
bool multicolor = ComputeEndpointsLS(pixels, trial_result, metrics, false, false);
|
||||
|
||||
if (trial_result.low == round_result.low && trial_result.high == round_result.high) break;
|
||||
|
||||
FindSelectors4(pixels, trial_result, needs_block_error);
|
||||
|
||||
if (!needs_block_error || trial_result.error < round_result.error) {
|
||||
round_result = trial_result;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
|
||||
if (trial.low == best.low && trial.high == best.high) break;
|
||||
FindSelectors4(pixels, trial);
|
||||
best = trial;
|
||||
}
|
||||
if (!needs_block_error || round_result.error < result.error) { result = round_result; }
|
||||
}
|
||||
|
||||
if (best.low == best.high) {
|
||||
EncodeBlockSingleColor(metrics.avg, dest);
|
||||
} else {
|
||||
EncodeBlock4Color(best, dest);
|
||||
}
|
||||
if (result.low == result.high) {
|
||||
EncodeBlockSingleColor(metrics.avg, dest);
|
||||
} else {
|
||||
EncodeBlock4Color(result, dest);
|
||||
}
|
||||
}
|
||||
|
||||
@ -447,38 +454,96 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, BC1Encoder::Flags flags, const B
|
||||
}
|
||||
}
|
||||
|
||||
unsigned BC1Encoder::FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, unsigned int cur_err, bool use_err) const {
|
||||
unsigned BC1Encoder::FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, bool use_err) const {
|
||||
// colors in selector order, 0, 1, 2, 3
|
||||
// 0 = low color, 1 = high color, 2/3 = interpolated
|
||||
std::array<Color, 4> colors = _interpolator->InterpolateBC1(block.low, block.high, false);
|
||||
std::array<Vector4Int, 4> colorVectors = {(Vector4Int)colors[0], (Vector4Int)colors[2], (Vector4Int)colors[3], (Vector4Int)colors[1]};
|
||||
std::array<Vector4Int, 4> color_vectors = {(Vector4Int)colors[0], (Vector4Int)colors[2], (Vector4Int)colors[3], (Vector4Int)colors[1]};
|
||||
unsigned total_error = 0;
|
||||
|
||||
if (!use_err) {
|
||||
Vector4Int a = colorVectors[3] - colorVectors[0];
|
||||
Color high = block.high.ScaleFrom565();
|
||||
Color low = block.low.ScaleFrom565();
|
||||
if (!use_err || (_flags & Flags::UseFasterMSEEval) != Flags::None) {
|
||||
Vector4Int axis = color_vectors[3] - color_vectors[0];
|
||||
std::array<int, 4> dots;
|
||||
for (unsigned i = 0; i < 4; i++) { dots[i] = a.Dot(colorVectors[i]); }
|
||||
for (unsigned i = 0; i < 4; i++) { dots[i] = axis.Dot(color_vectors[i]); }
|
||||
int t0 = dots[0] + dots[1], t1 = dots[1] + dots[2], t2 = dots[2] + dots[3];
|
||||
a *= 2;
|
||||
axis *= 2;
|
||||
|
||||
for (unsigned x = 0; x < 4; x++) {
|
||||
for (unsigned y = 0; y < 4; y++) {
|
||||
int dot = a.Dot((Vector4Int)pixels.Get(x, y));
|
||||
unsigned level = (dot <= t0) + (dot < t1) + (dot < t2);
|
||||
unsigned selector = 3 - level;
|
||||
assert(level < 4);
|
||||
assert(selector < 4);
|
||||
block.selectors[x + (4 * y)] = selector;
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
|
||||
int dot = axis.Dot(pixel_vector);
|
||||
uint8_t level = (dot <= t0) + (dot < t1) + (dot < t2);
|
||||
uint8_t selector = 3 - level;
|
||||
assert(level < 4);
|
||||
assert(selector < 4);
|
||||
|
||||
if ((_flags & Flags::UseFasterMSEEval) != Flags::None) {
|
||||
// llvm is just going to unswitch this anyways so its not an issue
|
||||
auto diff = pixel_vector - color_vectors[selector];
|
||||
total_error += diff.SqrMag();
|
||||
if (i % 4 != 0 && total_error >= block.error) break; // check only once per row if we're generating too much error
|
||||
}
|
||||
|
||||
block.selectors[i] = selector;
|
||||
}
|
||||
} else if ((_flags & Flags::UseFullMSEEval) != Flags::None) {
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
unsigned best_error = UINT_MAX;
|
||||
uint8_t best_sel = 0;
|
||||
Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
|
||||
|
||||
// exhasustively check every pixel's distance from each color, and calculate the error
|
||||
for (uint8_t j = 0; j < 4; j++) {
|
||||
auto diff = color_vectors[j] - pixel_vector;
|
||||
unsigned err = diff.SqrMag();
|
||||
if (err < best_error || ((err == best_error) && (j == 3))) {
|
||||
best_error = err;
|
||||
best_sel = j;
|
||||
}
|
||||
}
|
||||
|
||||
total_error += best_error;
|
||||
if (total_error >= block.error) break;
|
||||
|
||||
block.selectors[i] = best_sel;
|
||||
}
|
||||
} else {
|
||||
Vector4Int axis = color_vectors[3] - color_vectors[0];
|
||||
const float f = 4.0f / ((float)axis.SqrMag() + .00000125f);
|
||||
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
Vector4Int pixel_vector = (Vector4Int)pixels.Get(i);
|
||||
auto diff = pixel_vector - color_vectors[0];
|
||||
float sel_f = (float)diff.Dot(axis) * f + 0.5f;
|
||||
uint8_t sel = (uint8_t)clampi((int)sel_f, 1, 3);
|
||||
|
||||
unsigned err0 = (color_vectors[sel - 1] - pixel_vector).SqrMag();
|
||||
unsigned err1 = (color_vectors[sel] - pixel_vector).SqrMag();
|
||||
|
||||
uint8_t best_sel = sel;
|
||||
unsigned best_err = err1;
|
||||
if (err0 == err1) {
|
||||
// prefer non-interpolation
|
||||
if ((best_sel) == 1) best_sel = 0;
|
||||
} else if (err0 < best_err) {
|
||||
best_sel = sel - 1;
|
||||
best_err = err0;
|
||||
}
|
||||
|
||||
total_error += best_err;
|
||||
|
||||
if (total_error >= block.error) break;
|
||||
|
||||
block.selectors[i] = best_sel;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
return 0;
|
||||
block.is_3_color = false;
|
||||
block.is_1_color = false;
|
||||
block.error = total_error;
|
||||
return total_error;
|
||||
}
|
||||
|
||||
bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vector4 &low, Vector4 &high, BlockMetrics metrics, bool is_3color,
|
||||
bool use_black) const {
|
||||
bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, bool is_3color, bool use_black) const {
|
||||
Vector4 low, high;
|
||||
Vector4 q00 = {0, 0, 0};
|
||||
unsigned weight_accum = 0;
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
@ -486,7 +551,7 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto
|
||||
const int sel = (int)block.selectors[i];
|
||||
|
||||
if (use_black && color.IsBlack()) continue;
|
||||
if (is_3color && sel == 3) continue; // NOTE: selectors for 3-color are in linear order here, but not in original
|
||||
if (is_3color && sel == 3) continue; // NOTE: selectors for 3-color are in linear order here, but not in original
|
||||
assert(sel <= 3);
|
||||
|
||||
const Vector4Int color_vector = Vector4Int::FromColorRGB(color);
|
||||
@ -504,7 +569,10 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto
|
||||
|
||||
// invert matrix
|
||||
float det = z00 * z11 - z01 * z10;
|
||||
if (fabs(det) < 1e-8f) return false;
|
||||
if (fabs(det) < 1e-8f) {
|
||||
block.is_1_color = true;
|
||||
return false;
|
||||
}
|
||||
|
||||
det = ((float)denominator / 255.0f) / det;
|
||||
|
||||
@ -517,6 +585,9 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vecto
|
||||
low = (q00 * iz00) + (q10 * iz01);
|
||||
high = (q00 * iz10) + (q10 * iz11);
|
||||
|
||||
block.is_1_color = false;
|
||||
block.low = Color::PreciseRound565(low);
|
||||
block.high = Color::PreciseRound565(high);
|
||||
return true;
|
||||
}
|
||||
} // namespace rgbcx
|
@ -94,7 +94,7 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
|
||||
Exhaustive = 8192,
|
||||
|
||||
// Try 2 different ways of choosing the initial endpoints.
|
||||
TryAllInitialEndponts = 16384,
|
||||
TryAllInitialEndpoints = 16384,
|
||||
|
||||
// Same as BoundingBox, but implemented using integer math (faster, slightly less quality)
|
||||
BoundingBoxInt = 32768,
|
||||
@ -123,16 +123,16 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
|
||||
std::array<uint8_t, 16> selectors;
|
||||
bool is_3_color;
|
||||
bool is_1_color;
|
||||
unsigned error = UINT_MAX;
|
||||
};
|
||||
|
||||
void EncodeBlockSingleColor(Color color, BC1Block *dest) const;
|
||||
void EncodeBlock4Color(EncodeResults &block, BC1Block *dest) const;
|
||||
|
||||
void FindEndpoints(Color4x4 pixels, Flags flags, BlockMetrics const metrics, Color &low, Color &high) const;
|
||||
unsigned FindSelectors4(Color4x4 pixels, EncodeResults &block, unsigned cur_err = 0, bool use_err = false) const;
|
||||
unsigned FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, bool use_err) const;
|
||||
|
||||
bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Vector4 &low, Vector4 &high, BlockMetrics metrics, bool is_3color = false,
|
||||
bool use_black = false) const;
|
||||
bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, bool is_3color, bool use_black) const;
|
||||
|
||||
// match tables used for single-color blocks
|
||||
// Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible,
|
||||
|
@ -108,6 +108,13 @@ Color Color::Max(const Color &a, const Color &b) { return Color(std::max(a[0], b
|
||||
|
||||
Color::operator Vector4() const { return Vector4(r, g, b, a); }
|
||||
Color::operator Vector4Int() const { return Vector4Int(r, g, b, a);}
|
||||
Vector4Int operator-(const Color &lhs, const Color &rhs) {
|
||||
Vector4Int result;
|
||||
for (unsigned i = 0; i < 4; i++) {
|
||||
result[i] = (int)lhs[i] - rhs[i];
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
uint16_t Color::Pack565() const { return Pack565(r, g, b); }
|
||||
uint16_t Color::Pack565Unscaled() const { return Pack565Unscaled(r, g, b); }
|
||||
|
@ -63,6 +63,7 @@ class Color {
|
||||
|
||||
operator Vector4() const;
|
||||
operator Vector4Int() const;
|
||||
friend Vector4Int operator-(const Color &lhs, const Color &rhs);
|
||||
|
||||
void SetRGBA(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va);
|
||||
void SetRGBA(const Color &other) { SetRGBA(other.r, other.g, other.b, other.a); }
|
||||
|
@ -64,9 +64,7 @@ class Vector4Int {
|
||||
return _c[index];
|
||||
}
|
||||
|
||||
operator Vector4() const {
|
||||
return Vector4(_c[0], _c[1], _c[2], _c[3]);
|
||||
}
|
||||
operator Vector4() const { return Vector4(_c[0], _c[1], _c[2], _c[3]); }
|
||||
|
||||
friend Vector4Int operator+(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::plus()); }
|
||||
friend Vector4Int operator-(const Vector4Int &lhs, const Vector4Int &rhs) { return DoOp(lhs, rhs, std::minus()); }
|
||||
@ -99,6 +97,7 @@ class Vector4Int {
|
||||
}
|
||||
return max;
|
||||
}
|
||||
unsigned int SqrMag() { return (unsigned)Dot(*this, *this); }
|
||||
|
||||
private:
|
||||
template <typename Op> friend Vector4Int DoOp(const Vector4Int &lhs, const Vector4Int &rhs, Op f) {
|
||||
|
Loading…
Reference in New Issue
Block a user