Refactor order tables

This commit is contained in:
Andrew Cassidy 2021-03-01 22:05:04 -08:00
parent f65008ccc3
commit d0e66b31f7
12 changed files with 1519 additions and 140 deletions

View File

@ -41,19 +41,11 @@
namespace rgbcx {
using InterpolatorPtr = std::shared_ptr<Interpolator>;
using Hist3 = OrderTable<3>::Histogram;
using Hist4 = OrderTable<4>::Histogram;
using Hash = uint16_t;
using BlockMetrics = Color4x4::BlockMetrics;
using EncodeResults = BC1Encoder::EncodeResults;
using ColorMode = BC1Encoder::ColorMode;
// Static Fields
OrderTable<3> *BC1Encoder::order_table3 = nullptr;
OrderTable<4> *BC1Encoder::order_table4 = nullptr;
std::mutex BC1Encoder::order_table_mutex = std::mutex();
bool BC1Encoder::order_tables_generated = false;
// constructors
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) {
_flags = Flags::UseFullMSEEval | Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks;
@ -62,19 +54,11 @@ BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolato
_orderings4 = 16;
_orderings3 = 8;
// generate lookup tables
order_table_mutex.lock();
if (!order_tables_generated) {
assert(order_table3 == nullptr);
assert(order_table4 == nullptr);
OrderTable<3>::Generate();
OrderTable<4>::Generate();
order_table3 = new OrderTable<3>();
order_table4 = new OrderTable<4>();
order_tables_generated = true;
}
assert(order_table3 != nullptr);
assert(order_table4 != nullptr);
order_table_mutex.unlock();
assert(OrderTable<3>::generated);
assert(OrderTable<4>::generated);
}
void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
@ -123,8 +107,7 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
// First refinement pass using ordered cluster fit
if (result.error > 0 && (bool)(_flags & Flags::UseLikelyTotalOrderings)) {
for (unsigned iter = 0; iter < total_cf_iters; iter++) { RefineBlockCF<ColorMode::FourColor>(pixels, result, metrics, _error_mode, _orderings4);
}
for (unsigned iter = 0; iter < total_cf_iters; iter++) { RefineBlockCF<ColorMode::FourColor>(pixels, result, metrics, _error_mode, _orderings4); }
}
// try for 3-color block
@ -651,8 +634,9 @@ template <ColorMode M> void BC1Encoder::RefineEndpointsLS(std::array<Vector4, 17
Vector4 q10 = {0, 0, 0};
unsigned level = 0;
Histogram<color_count> h = OrderTable<color_count>::Orders[hash];
for (unsigned i = 0; i < (color_count - 1); i++) {
level += OrderTable<color_count>::GetUniqueOrdering(hash, i);
level += h[i];
q10 += sums[level];
}
@ -698,19 +682,12 @@ void BC1Encoder::RefineBlockCF(Color4x4 &pixels, EncodeResults &block, BlockMetr
assert(block.color_mode != ColorMode::Incomplete);
using OrderTable = OrderTable<color_count>;
using Hist = typename OrderTable::Histogram;
using Hist = Histogram<color_count>;
EncodeResults orig = block;
Hist h = Hist(orig.selectors);
Hash start_hash;
if constexpr (color_count == 4) {
start_hash = order_table4->GetHash(h);
}
else {
start_hash = order_table3->GetHash(h);
}
Hash start_hash = OrderTable::GetHash(h);
Vector4 axis = orig.high.ScaleFrom565() - orig.low.ScaleFrom565();
std::array<Vector4, 16> color_vectors;
@ -732,19 +709,11 @@ void BC1Encoder::RefineBlockCF(Color4x4 &pixels, EncodeResults &block, BlockMetr
sums[i + 1] = sums[i] + color_vectors[p];
}
const Hash q_total =
((_flags & Flags::Exhaustive) != Flags::None) ? OrderTable::UniqueOrderings : orderings;
const Hash q_total = ((_flags & Flags::Exhaustive) != Flags::None) ? OrderTable::OrderCount : orderings;
for (Hash q = 0; q < q_total; q++) {
Hash trial_hash;
Vector4 trial_matrix;
if (color_count == 4) {
trial_hash = ((_flags & Flags::Exhaustive) != Flags::None) ? q : g_best_total_orderings4[start_hash][q];
trial_matrix = order_table4->GetFactors(trial_hash);
} else {
trial_hash = ((_flags & Flags::Exhaustive) != Flags::None) ? q : g_best_total_orderings3[start_hash][q];
trial_matrix = order_table3->GetFactors(trial_hash);
}
Hash trial_hash = ((_flags & Flags::Exhaustive) != Flags::None) ? q : OrderTable::BestOrders[start_hash][q];
Vector4 trial_matrix = OrderTable::GetFactors(trial_hash);
EncodeResults trial_result = orig;
Vector4 low, high;

View File

@ -32,7 +32,7 @@
#include "BC1Block.h"
#include "OrderTable.h"
#include "SingleColorTable.h"
#include "tables.h"
#include "Tables.h"
namespace rgbcx {
@ -148,11 +148,6 @@ class BC1Encoder final : public BlockEncoder<BC1Block, 4, 4> {
unsigned _orderings4;
unsigned _orderings3;
static OrderTable<4> *order_table4; // order table for 3-color blocks
static OrderTable<3> *order_table3; // order table for 4-color blocks
static std::mutex order_table_mutex; // prevent race condition with multiple BC1Encoders constructed at once
static bool order_tables_generated; // have the order tables been generated by a previous instance?
void WriteBlockSolid(Color color, BC1Block *dest) const;
void WriteBlock(EncodeResults &block, BC1Block *dest) const;

85
src/BC1/Histogram.h Normal file
View File

@ -0,0 +1,85 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdint>
#include <initializer_list>
#include <mutex>
#include <numeric>
#include "../Vector4.h"
#include "../util.h"
#include "OrderTable.h"
#include "Tables.h"
namespace rgbcx {
template <size_t N> class Histogram {
public:
using Hash = uint16_t;
Histogram() { _bins = {0}; }
Histogram(std::array<uint8_t, 16> sels) {
_bins = {0};
for (unsigned i = 0; i < 16; i++) {
assert(sels[i] < N);
_bins[sels[i]]++;
}
}
Histogram(std::initializer_list<uint8_t> init) {
assert(init.size() <= N);
_bins.fill(0);
auto item = init.begin();
for (unsigned i = 0; i < init.size(); i++) {
_bins[i] = *item;
item++;
}
}
uint8_t operator[](size_t index) const {
assert(index < N);
return _bins[index];
}
uint8_t &operator[](size_t index) {
assert(index < N);
return _bins[index];
}
bool Any16() {
return std::any_of(_bins.begin(), _bins.end(), [](int i) { return i == 16; });
}
unsigned GetPacked() const {
unsigned packed = 0;
for (unsigned i = 0; i < (N - 1); i++) { packed |= (_bins[i] << (4 * i)); }
// assert(packed < HashCount);
return packed;
}
private:
std::array<uint8_t, N> _bins;
};
} // namespace rgbcx

1319
src/BC1/OrderTable.cpp Normal file

File diff suppressed because it is too large Load Diff

View File

@ -27,119 +27,124 @@
#include "../Vector4.h"
#include "../util.h"
#include "tables.h"
#include "Histogram.h"
#include "Tables.h"
namespace rgbcx {
template <size_t N> class Histogram;
template <size_t N> class OrderTable {
public:
static constexpr unsigned HashCount = 1 << ((N - 1) * 4); // 16**(N-1)
static constexpr unsigned OrderCount = (N == 4) ? 969 : 153; //(16+N-1)C(N-1)
#if RGBCX_USE_SMALLER_TABLES
static constexpr unsigned BestOrderCount = 32;
#else
static constexpr unsigned BestOrderCount = (N == 4) ? 128 : 32;
#endif
using Hash = uint16_t;
using FactorMatrix = std::array<float, 3>;
using OrderArray = std::array<Histogram<N>, OrderCount>;
using BestOrderRow = std::array<Hash, BestOrderCount>;
using BestOrderArray = std::array<BestOrderRow, OrderCount>;
class Histogram {
public:
Histogram() { _bins = {0}; }
static std::atomic<bool> generated;
Histogram(std::array<uint8_t, 16> sels) {
_bins = {0};
for (unsigned i = 0; i < 16; i++) {
assert(sels[i] < N);
_bins[sels[i]]++;
}
}
static const OrderArray Orders;
static const BestOrderArray BestOrders;
static const std::array<Vector4, N> Weights;
static const std::array<Hash, N> SingleColorHashes;
uint8_t operator[](size_t index) const {
assert(index < N);
return _bins[index];
}
uint8_t &operator[](size_t index) {
assert(index < N);
return _bins[index];
}
bool Any16() {
return std::any_of(_bins.begin(), _bins.end(), [](int i) { return i == 16; });
}
unsigned GetPacked() const {
unsigned packed = 0;
for (unsigned i = 0; i < (N - 1); i++) { packed |= (_bins[i] << (4 * i)); }
assert(packed < TotalHashes);
return packed;
}
private:
std::array<uint8_t, N> _bins;
};
static inline constexpr size_t UniqueOrderings = (N == 4) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : NUM_UNIQUE_TOTAL_ORDERINGS3;
static inline constexpr size_t TotalHashes = (N == 4) ? 4096 : 256;
static inline constexpr uint8_t GetUniqueOrdering(Hash hash, unsigned selector) {
if constexpr (N == 4) { return g_unique_total_orders4[hash][selector]; }
return g_unique_total_orders3[hash][selector];
}
OrderTable<N>() {
static bool Generate() {
static_assert(N == 4 || N == 3);
const unsigned *weight_vals = (N == 4) ? g_weight_vals4 : g_weight_vals3;
table_mutex.lock();
if (generated) return false;
hashes = new std::array<Hash, HashCount>();
factors = new std::array<Vector4, OrderCount>();
const float denominator = (N == 4) ? 3.0f : 2.0f;
for (unsigned i = 0; i < UniqueOrderings; i++) {
Histogram h;
for (unsigned j = 0; j < N; j++) { h[j] = GetUniqueOrdering(i, j); }
for (uint16_t i = 0; i < OrderCount; i++) {
Histogram<N> h = Orders[i];
if (!h.Any16()) hashes->at(h.GetPacked()) = i;
if (!h.Any16()) _hashes[h.GetPacked()] = (Hash)i;
Vector4 factor_matrix = 0;
for (unsigned sel = 0; sel < N; sel++) factor_matrix += (Weights[sel] * h[sel]);
unsigned weight_accum = 0;
for (unsigned sel = 0; sel < N; sel++) weight_accum += (weight_vals[sel] * h[sel]);
// todo: use a Vector4 here instead for SIMD readiness
float z00 = (float)((weight_accum >> 16) & 0xFF);
float z10 = (float)((weight_accum >> 8) & 0xFF);
float z11 = (float)(weight_accum & 0xFF);
float z01 = z10;
float det = z00 * z11 - z01 * z10;
float det = factor_matrix.Determinant2x2();
if (fabs(det) < 1e-8f) {
_factors[i][0] = 0;
_factors[i][1] = 0;
_factors[i][2] = 0;
factors->at(i) = Vector4(0);
} else {
det = (denominator / 255.0f) / det;
_factors[i][0] = z11 * det;
_factors[i][1] = -z10 * det;
_factors[i][2] = z00 * det;
factor_matrix *= Vector4(1, -1, -1, 1);
factor_matrix *= (denominator / 255.0f) / det;
factors->at(i) = factor_matrix;
}
}
generated = true;
table_mutex.unlock();
assert(generated);
return true;
}
Hash GetHash(Histogram &hist) const {
static Hash GetHash(Histogram<N> &hist) {
for (unsigned i = 0; i < N; i++) {
if (hist[i] == 16) return GetSingleColorHashes()[i];
if (hist[i] == 16) return SingleColorHashes[i];
}
return _hashes[hist.GetPacked()];
assert(generated);
assert(hashes != nullptr);
auto hash = hashes->at(hist.GetPacked());
assert(hash < OrderCount);
return hash;
}
Vector4 GetFactors(Hash hash) { return Vector4(_factors[hash][0], _factors[hash][1], _factors[hash][1], _factors[hash][2]); }
static Vector4 GetFactors(Hash hash) {
assert(generated);
assert(factors != nullptr);
static inline constexpr std::array<Hash, N> GetSingleColorHashes() {
if constexpr (N == 4) { return {15, 700, 753, 515}; }
return {12, 15, 89};
return factors->at(hash);
}
static inline constexpr bool IsSingleColor(Hash hash) {
auto hashes = GetSingleColorHashes();
return (std::find(hashes.begin(), hashes.end(), hash) != hashes.end());
}
static bool IsSingleColor(Hash hash) { return (std::find(SingleColorHashes.begin(), SingleColorHashes.end(), hash) != SingleColorHashes.end()); }
private:
std::array<Hash, TotalHashes> _hashes;
std::array<FactorMatrix, UniqueOrderings> _factors;
static std::mutex table_mutex;
static std::array<Hash, HashCount> *hashes;
static std::array<Vector4, OrderCount> *factors;
};
template <> std::atomic<bool> OrderTable<3>::generated;
template <> std::atomic<bool> OrderTable<4>::generated;
template <> std::mutex OrderTable<3>::table_mutex;
template <> std::mutex OrderTable<4>::table_mutex;
template <> std::array<OrderTable<3>::Hash, OrderTable<3>::HashCount> *OrderTable<3>::hashes;
template <> std::array<OrderTable<4>::Hash, OrderTable<4>::HashCount> *OrderTable<4>::hashes;
template <> std::array<Vector4, OrderTable<3>::OrderCount> *OrderTable<3>::factors;
template <> std::array<Vector4, OrderTable<4>::OrderCount> *OrderTable<4>::factors;
template <> const std::array<Vector4, 3> OrderTable<3>::Weights;
template <> const std::array<Vector4, 4> OrderTable<4>::Weights;
template <> const std::array<uint16_t, 3> OrderTable<3>::SingleColorHashes;
template <> const std::array<uint16_t, 4> OrderTable<4>::SingleColorHashes;
template <> const OrderTable<3>::OrderArray OrderTable<3>::Orders;
template <> const OrderTable<4>::OrderArray OrderTable<4>::Orders;
template <> const OrderTable<3>::BestOrderArray OrderTable<3>::BestOrders;
template <> const OrderTable<4>::BestOrderArray OrderTable<4>::BestOrders;
extern template class OrderTable<3>;
extern template class OrderTable<4>;
} // namespace rgbcx

View File

@ -1,7 +1,7 @@
// rgbcx.h v1.12
// High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 <richgel99@gmail.com>.
#include "tables.h"
#include "Tables.h"
const float g_midpoint5[32] = {.015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f,
.370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f,
@ -1116,7 +1116,7 @@ const uint16_t g_best_total_orderings4[NUM_UNIQUE_TOTAL_ORDERINGS4][MAX_TOTAL_OR
{9, 39, 0, 166, 68, 101, 28, 364, 30, 158, 562, 35, 175, 65, 333, 154, 49, 404, 706, 124, 21, 252, 274, 168, 190, 289, 100, 570, 16, 1, 310, 346},
{15, 515, 700, 753, 341, 13, 0, 23, 1, 33, 141, 4, 260, 82, 77, 51, 351, 180, 9, 5, 115, 137, 10, 217, 11, 120, 102, 40, 349, 269, 202, 854}
#else
#include "table4.h"
#include "Table4.h"
#endif
};

View File

@ -3,13 +3,14 @@
#pragma once
#include <cstdint>
#include <array>
// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
static constexpr uint32_t g_weight_vals4[4] = {0x000009, 0x010204, 0x040201, 0x090000};
static constexpr std::array<uint32_t, 4> g_weight_vals4 = {0x000009, 0x010204, 0x040201, 0x090000};
// multiplier is 4 for 3-color
static constexpr uint32_t g_weight_vals3[3] = {0x000004, 0x040000, 0x010101};
static constexpr std::array<uint32_t, 3> g_weight_vals3 = {0x000004, 0x040000, 0x010101};
const uint32_t MIN_TOTAL_ORDERINGS = 1;
const uint32_t MAX_TOTAL_ORDERINGS3 = 32;

View File

@ -98,7 +98,12 @@ class Vector4 {
return max;
}
unsigned int SqrMag() { return (unsigned)Dot(*this, *this); }
float SqrMag() { return Dot(*this, *this); }
float Determinant2x2() {
//z00 * z11 - z01 * z10;
return (_c[0] * _c[3]) - (_c[1] * _c[2]);
}
private:
template <typename Op> static inline Vector4 DoOp(const Vector4 &lhs, const Vector4 &rhs, Op f) {

View File

@ -13,7 +13,7 @@
#include <type_traits>
#include "BC1/BC1Block.h"
#include "BC1/tables.h"
#include "BC1/Tables.h"
#include "Color.h"
#include "util.h"

View File

@ -19,7 +19,7 @@ typedef struct
// m_max_partitions_mode may range from 0 (disables mode 1) to BC7ENC_MAX_PARTITIONS1. The higher this value, the slower the compressor, but the higher the quality.
uint32_t m_max_partitions_mode;
// Relative RGBA or YCbCrA weights.
// Relative RGBA or YCbCrA Weights.
uint32_t m_weights[4];
// m_uber_level may range from 0 to BC7ENC_MAX_UBER_LEVEL. The higher this value, the slower the compressor, but the higher the quality.

View File

@ -675,7 +675,7 @@ Jyrki Katajainen, Alistair Moffat, Andrew Turpin, 1995.*/
/*chain node for boundary package merge*/
typedef struct BPMNode
{
int weight; /*the sum of all weights in this chain*/
int weight; /*the sum of all Weights in this chain*/
unsigned index; /*index of this leaf node (called "count" in the paper)*/
struct BPMNode* tail; /*the next nodes in this chain (null if last)*/
int in_use;
@ -769,7 +769,7 @@ static void boundaryPM(BPMLists* lists, BPMNode* leaves, size_t numpresent, int
}
else
{
/*sum of the weights of the head nodes of the previous lookahead chains.*/
/*sum of the Weights of the head nodes of the previous lookahead chains.*/
int sum = lists->chains0[c - 1]->weight + lists->chains1[c - 1]->weight;
lists->chains0[c] = lists->chains1[c];
if (lastindex < numpresent && sum > leaves[lastindex].weight)