Add (extremely bad) BC1 encoding support
parent
628ad558d8
commit
c879061e4e
@ -0,0 +1,175 @@
|
|||||||
|
/* Python-rgbcx Texture Compression Library
|
||||||
|
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||||
|
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||||
|
and licenced under the public domain
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "BC1Encoder.h"
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
|
||||||
|
#include "../BlockView.h"
|
||||||
|
#include "../Color.h"
|
||||||
|
#include "../bitwiseEnums.h"
|
||||||
|
|
||||||
|
namespace rgbcx {
|
||||||
|
using MatchList = std::array<BC1MatchEntry, 256>;
|
||||||
|
using MatchListPtr = std::shared_ptr<MatchList>;
|
||||||
|
using InterpolatorPtr = std::shared_ptr<Interpolator>;
|
||||||
|
|
||||||
|
// region Free Functions/Templates
|
||||||
|
inline void PrepSingleColorTableEntry(unsigned &error, MatchList &match_table, uint8_t v, unsigned i, uint8_t low, uint8_t high, uint8_t low8, uint8_t high8,
|
||||||
|
bool ideal) {
|
||||||
|
unsigned new_error = iabs(v - (int)i);
|
||||||
|
|
||||||
|
// We only need to factor in 3% error in BC1 ideal mode.
|
||||||
|
if (ideal) new_error += (iabs(high8 - (int)low8) * 3) / 100;
|
||||||
|
|
||||||
|
// Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
|
||||||
|
if ((new_error < error) || (new_error == error && low == high)) {
|
||||||
|
assert(new_error <= UINT8_MAX);
|
||||||
|
|
||||||
|
match_table[i].low = (uint8_t)low;
|
||||||
|
match_table[i].high = (uint8_t)high;
|
||||||
|
match_table[i].error = (uint8_t)new_error;
|
||||||
|
|
||||||
|
error = new_error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
template <size_t S> void PrepSingleColorTable(MatchList &match_table, MatchList &match_table_half, Interpolator &interpolator) {
|
||||||
|
unsigned size = 1 << S;
|
||||||
|
|
||||||
|
assert((S == 5 && size == 32) || (S == 6 && size == 64));
|
||||||
|
|
||||||
|
bool ideal = interpolator.IsIdeal();
|
||||||
|
bool use_8bit = interpolator.CanInterpolate8Bit();
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < 256; i++) {
|
||||||
|
unsigned error = 256;
|
||||||
|
unsigned error_half = 256;
|
||||||
|
|
||||||
|
// TODO: Can probably avoid testing for values that definitely wont yield good results,
|
||||||
|
// e.g. low8 and high8 both much smaller or larger than index
|
||||||
|
for (uint8_t low = 0; low < size; low++) {
|
||||||
|
uint8_t low8 = (S == 5) ? scale5To8(low) : scale6To8(low);
|
||||||
|
|
||||||
|
for (uint8_t high = 0; high < size; high++) {
|
||||||
|
uint8_t high8 = (S == 5) ? scale5To8(high) : scale6To8(low);
|
||||||
|
uint8_t value, value_half;
|
||||||
|
|
||||||
|
if (use_8bit) {
|
||||||
|
value = interpolator.Interpolate8(high8, low8);
|
||||||
|
value_half = interpolator.InterpolateHalf8(high8, low8);
|
||||||
|
} else {
|
||||||
|
value = (S == 5) ? interpolator.Interpolate5(high, low) : interpolator.Interpolate6(high, low);
|
||||||
|
value_half = (S == 5) ? interpolator.InterpolateHalf5(high, low) : interpolator.InterpolateHalf6(high, low);
|
||||||
|
}
|
||||||
|
|
||||||
|
PrepSingleColorTableEntry(error, match_table, value, i, low, high, low8, high8, ideal);
|
||||||
|
PrepSingleColorTableEntry(error_half, match_table_half, value_half, i, low, high, low8, high8, ideal);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// endregion
|
||||||
|
|
||||||
|
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) {
|
||||||
|
PrepSingleColorTable<5>(*_single_match5, *_single_match5_half, *_interpolator);
|
||||||
|
PrepSingleColorTable<6>(*_single_match6, *_single_match6_half, *_interpolator);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
|
||||||
|
auto r_view = pixels.GetChannel(0);
|
||||||
|
auto g_view = pixels.GetChannel(1);
|
||||||
|
auto b_view = pixels.GetChannel(2);
|
||||||
|
|
||||||
|
if (pixels.IsSingleColor() || true) { // for now assume (wrongly) everything is a single-color block
|
||||||
|
// single-color pixel block, do it the fast way
|
||||||
|
EncodeBlockSingleColor(pixels.Get(0, 0), dest);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Color min, max, avg;
|
||||||
|
pixels.GetMinMaxAvgRGB(min, max, avg);
|
||||||
|
}
|
||||||
|
|
||||||
|
void BC1Encoder::EncodeBlockSingleColor(Color color, BC1Block *dest) const {
|
||||||
|
uint8_t mask = 0xAA; // 2222
|
||||||
|
uint16_t min16, max16;
|
||||||
|
|
||||||
|
bool using_3color = false;
|
||||||
|
|
||||||
|
// why is there no subscript operator for shared_ptr<array>
|
||||||
|
MatchList &match5 = *_single_match5;
|
||||||
|
MatchList &match6 = *_single_match6;
|
||||||
|
MatchList &match5_half = *_single_match5_half;
|
||||||
|
MatchList &match6_half = *_single_match6_half;
|
||||||
|
|
||||||
|
BC1MatchEntry match_r = match5[color.r];
|
||||||
|
BC1MatchEntry match_g = match6[color.g];
|
||||||
|
BC1MatchEntry match_b = match5[color.b];
|
||||||
|
|
||||||
|
if ((_flags & (Flags::Use3ColorBlocks | Flags::Use3ColorBlocksForBlackPixels)) != Flags::None) {
|
||||||
|
BC1MatchEntry match_r_half = match5_half[color.r];
|
||||||
|
BC1MatchEntry match_g_half = match6_half[color.g];
|
||||||
|
BC1MatchEntry match_b_half = match5_half[color.b];
|
||||||
|
|
||||||
|
const unsigned err4 = match_r.error + match_g.error + match_b.error;
|
||||||
|
const unsigned err3 = match_r_half.error + match_g_half.error + match_b_half.error;
|
||||||
|
|
||||||
|
if (err3 < err4) {
|
||||||
|
min16 = Color::Pack565Unscaled(match_r_half.low, match_g_half.low, match_b_half.low);
|
||||||
|
max16 = Color::Pack565Unscaled(match_r_half.high, match_g_half.high, match_b_half.high);
|
||||||
|
|
||||||
|
if (max16 > min16) std::swap(min16, max16);
|
||||||
|
using_3color = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!using_3color) {
|
||||||
|
min16 = Color::Pack565Unscaled(match_r.low, match_g.low, match_b.low);
|
||||||
|
max16 = Color::Pack565Unscaled(match_r.high, match_g.high, match_b.high);
|
||||||
|
|
||||||
|
if (min16 == max16) {
|
||||||
|
// make sure this isnt accidentally a 3-color block
|
||||||
|
// so make max16 > min16 (l > h)
|
||||||
|
if (min16 > 0) {
|
||||||
|
min16--;
|
||||||
|
mask = 0; // endpoints are equal so mask doesnt matter
|
||||||
|
} else {
|
||||||
|
assert(min16 == 0 && max16 == 0);
|
||||||
|
max16 = 1;
|
||||||
|
min16 = 0;
|
||||||
|
mask = 0x55; // 1111 (min value only, max is ignored)
|
||||||
|
}
|
||||||
|
} else if (max16 < min16) {
|
||||||
|
std::swap(min16, max16);
|
||||||
|
mask = 0xFF; // invert mask to 3333
|
||||||
|
}
|
||||||
|
assert(max16 > min16);
|
||||||
|
}
|
||||||
|
|
||||||
|
dest->SetLowColor(max16);
|
||||||
|
dest->SetHighColor(min16);
|
||||||
|
dest->selectors[0] = mask;
|
||||||
|
dest->selectors[1] = mask;
|
||||||
|
dest->selectors[2] = mask;
|
||||||
|
dest->selectors[3] = mask;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace rgbcx
|
@ -0,0 +1,145 @@
|
|||||||
|
/* Python-rgbcx Texture Compression Library
|
||||||
|
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||||
|
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||||
|
and licenced under the public domain
|
||||||
|
|
||||||
|
This program is free software: you can redistribute it and/or modify
|
||||||
|
it under the terms of the GNU Lesser General Public License as published by
|
||||||
|
the Free Software Foundation, either version 3 of the License, or
|
||||||
|
(at your option) any later version.
|
||||||
|
|
||||||
|
This program is distributed in the hope that it will be useful,
|
||||||
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
GNU Lesser General Public License for more details.
|
||||||
|
|
||||||
|
You should have received a copy of the GNU Lesser General Public License
|
||||||
|
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <cassert>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <mutex>
|
||||||
|
|
||||||
|
#include "../BlockEncoder.h"
|
||||||
|
#include "../BlockView.h"
|
||||||
|
#include "../Interpolator.h"
|
||||||
|
#include "../bitwiseEnums.h"
|
||||||
|
#include "../ndebug.h"
|
||||||
|
#include "../tables.h"
|
||||||
|
#include "BC1Block.h"
|
||||||
|
|
||||||
|
namespace rgbcx {
|
||||||
|
|
||||||
|
struct BC1MatchEntry {
|
||||||
|
uint8_t high;
|
||||||
|
uint8_t low;
|
||||||
|
uint8_t error;
|
||||||
|
};
|
||||||
|
|
||||||
|
class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
|
||||||
|
public:
|
||||||
|
using InterpolatorPtr = std::shared_ptr<Interpolator>;
|
||||||
|
|
||||||
|
enum class Flags : uint32_t {
|
||||||
|
None = 0,
|
||||||
|
|
||||||
|
// Try to improve quality using the most likely total orderings.
|
||||||
|
// The total_orderings_to_try parameter will then control the number of total orderings to try for 4 color blocks, and the
|
||||||
|
// total_orderings_to_try3 parameter will control the number of total orderings to try for 3 color blocks (if they are enabled).
|
||||||
|
UseLikelyTotalOrderings = 2,
|
||||||
|
|
||||||
|
// Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option).
|
||||||
|
// Recommended if you're enabling UseLikelyTotalOrderings.
|
||||||
|
TwoLeastSquaresPasses = 4,
|
||||||
|
|
||||||
|
// Use3ColorBlocksForBlackPixels allows the BC1 encoder to use 3-color blocks for blocks containing black or very dark pixels.
|
||||||
|
// You shader/engine MUST ignore the alpha channel on textures encoded with this flag.
|
||||||
|
// Average quality goes up substantially for my 100 texture corpus (~.5 dB), so it's worth using if you can.
|
||||||
|
// Note the BC1 encoder does not actually support transparency in 3-color mode.
|
||||||
|
// Don't set when encoding to BC3.
|
||||||
|
Use3ColorBlocksForBlackPixels = 8,
|
||||||
|
|
||||||
|
// If Use3ColorBlocks is set, the encoder can use 3-color mode for a small but noticeable gain in average quality, but lower perf.
|
||||||
|
// If you also specify the UseLikelyTotalOrderings flag, set the total_orderings_to_try3 paramter to the number of total orderings to try.
|
||||||
|
// Don't set when encoding to BC3.
|
||||||
|
Use3ColorBlocks = 16,
|
||||||
|
|
||||||
|
// Iterative will greatly increase encode time, but is very slightly higher quality.
|
||||||
|
// Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, unless you just don't care about perf. at all.
|
||||||
|
Iterative = 32,
|
||||||
|
|
||||||
|
// BoundingBox enables a fast all-integer PCA approximation on 4-color blocks.
|
||||||
|
// At level 0 options (no other flags), this is ~15% faster, and higher *average* quality.
|
||||||
|
BoundingBox = 64,
|
||||||
|
|
||||||
|
// Use a slightly lower quality, but ~30% faster MSE evaluation function for 4-color blocks.
|
||||||
|
UseFasterMSEEval = 128,
|
||||||
|
|
||||||
|
// Examine all colors to compute selectors/MSE (slower than default)
|
||||||
|
UseFullMSEEval = 256,
|
||||||
|
|
||||||
|
// Use 2D least squares+inset+optimal rounding (the method used in Humus's GPU texture encoding demo), instead of PCA.
|
||||||
|
// Around 18% faster, very slightly lower average quality to better (depends on the content).
|
||||||
|
Use2DLS = 512,
|
||||||
|
|
||||||
|
// Use 6 power iterations vs. 4 for PCA.
|
||||||
|
Use6PowerIters = 2048,
|
||||||
|
|
||||||
|
// Check all total orderings - *very* slow. The encoder is not designed to be used in this way.
|
||||||
|
Exhaustive = 8192,
|
||||||
|
|
||||||
|
// Try 2 different ways of choosing the initial endpoints.
|
||||||
|
TryAllInitialEndponts = 16384,
|
||||||
|
|
||||||
|
// Same as BoundingBox, but implemented using integer math (faster, slightly less quality)
|
||||||
|
BoundingBoxInt = 32768,
|
||||||
|
|
||||||
|
// Try refining the final endpoints by examining nearby colors.
|
||||||
|
EndpointSearchRoundsShift = 22,
|
||||||
|
EndpointSearchRoundsMask = 1023U << EndpointSearchRoundsShift,
|
||||||
|
};
|
||||||
|
|
||||||
|
BC1Encoder(InterpolatorPtr interpolator);
|
||||||
|
|
||||||
|
|
||||||
|
void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
const InterpolatorPtr _interpolator;
|
||||||
|
|
||||||
|
Flags _flags;
|
||||||
|
unsigned _search_rounds;
|
||||||
|
unsigned _orderings4;
|
||||||
|
unsigned _orderings3;
|
||||||
|
|
||||||
|
void EncodeBlockSingleColor(Color color, BC1Block *dest) const;
|
||||||
|
|
||||||
|
// match tables used for single-color blocks
|
||||||
|
// Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible,
|
||||||
|
// with an included error value
|
||||||
|
// these depend on the interpolator
|
||||||
|
using MatchList = std::array<BC1MatchEntry, 256>;
|
||||||
|
using MatchListPtr = std::shared_ptr<MatchList>;
|
||||||
|
|
||||||
|
const MatchListPtr _single_match5 = std::make_shared<MatchList>();
|
||||||
|
const MatchListPtr _single_match6 = std::make_shared<MatchList>();
|
||||||
|
const MatchListPtr _single_match5_half = std::make_shared<MatchList>();
|
||||||
|
const MatchListPtr _single_match6_half = std::make_shared<MatchList>();
|
||||||
|
|
||||||
|
// static lookup tables, generated the first time an encoder is created
|
||||||
|
// the mutex prevents race conditions if two encoders are created on different threads
|
||||||
|
static std::mutex _luts_mutex;
|
||||||
|
static bool _luts_initialized;
|
||||||
|
|
||||||
|
// lookup table for hash values
|
||||||
|
static uint16_t g_total_ordering4_hash[4096];
|
||||||
|
static uint16_t g_total_ordering3_hash[256];
|
||||||
|
|
||||||
|
static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3];
|
||||||
|
static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3];
|
||||||
|
};
|
||||||
|
} // namespace rgbcx
|
Loading…
Reference in New Issue