Improved bindings and dont use punchthrough in BC3

This commit is contained in:
Andrew Cassidy 2021-03-18 02:34:31 -07:00
parent 70ebb43017
commit 71119b9279
16 changed files with 243 additions and 190 deletions

View File

@ -10,9 +10,9 @@
# add these directories to sys.path here. If the directory is relative to the
# documentation root, use os.path.abspath to make it absolute, like shown here.
#
import os
import sys
sys.path.insert(0, os.path.abspath('../quicktex'))
# import os
# import sys
# sys.path.insert(0, os.path.abspath('..'))
# -- Project information -----------------------------------------------------
@ -27,8 +27,8 @@ author = 'Andrew Cassidy'
# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
# ones.
extensions = [
'myst_parser',
'sphinx_rtd_theme',
# 'myst_parser',
# 'sphinx_rtd_theme',
'sphinx.ext.autodoc'
]
@ -45,7 +45,7 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes.
#
html_theme = 'sphinx_rtd_theme'
# html_theme = 'sphinx_rtd_theme'
# Add any paths that contain custom static files (such as style sheets) here,
# relative to this directory. They are copied after the builtin static files,

View File

@ -1,11 +1,12 @@
.. py:module:: quicktex.s3tc
s3tc module
===========
this is some text
bc1 module
----------
.. automodule:: quicktex.s3tc.bc1
:members:
:undoc-members:
:special-members:

View File

@ -36,22 +36,10 @@ void InitBC5(py::module_ &s3tc);
void InitS3TC(py::module_ &m) {
py::module_ s3tc = m.def_submodule("_s3tc", "s3tc compression library based on rgbcx.h written by Richard Goldreich");
/* using IType = Interpolator::Type;
auto interpolator_type = py::enum_<IType>(s3tc, "InterpolatorType", R"pbdoc(
An enum representing various methods for interpolating colors, used by the BC1 and BC3 encoders/decoders.
Vendor-specific interpolation modes should only be used when the result will only be used on that type of GPU.
For most applications, :py:attr:`~quicktex.s3tc.InterpolatorType.Ideal` should be used.
)pbdoc");
interpolator_type.value("Ideal", IType::Ideal, "The default mode, with no rounding for colors 2 and 3. This matches the D3D10 docs on BC1.");
interpolator_type.value("IdealRound", IType::IdealRound, "Round colors 2 and 3. Matches the AMD Compressonator tool and the D3D9 docs on DXT1.");
interpolator_type.value("Nvidia", IType::Nvidia, "Nvidia GPU mode.");
interpolator_type.value("AMD", IType::AMD, "AMD GPU mode.");*/
InitInterpolator(s3tc);
InitBC1(s3tc);
// InitBC3(s3tc);
// InitBC4(s3tc);
// InitBC5(s3tc);
// InitBC3(s3tc);
// InitBC4(s3tc);
// InitBC5(s3tc);
}
} // namespace quicktex::bindings

View File

@ -30,10 +30,14 @@
namespace quicktex::s3tc {
void BC1Decoder::DecodeBlock(Color4x4 dest, BC1Block *const block) const noexcept(ndebug) {
DecodeBlock(dest, block, true);
}
void BC1Decoder::DecodeBlock(Color4x4 dest, BC1Block *const block, bool allow_3color) const noexcept(ndebug) {
const auto l = block->GetLowColor();
const auto h = block->GetHighColor();
const auto selectors = block->UnpackSelectors();
const auto colors = _interpolator->InterpolateBC1(l, h);
const auto colors = _interpolator->InterpolateBC1(l, h, allow_3color);
for (unsigned y = 0; y < 4; y++) {
for (unsigned x = 0; x < 4; x++) {

View File

@ -41,8 +41,9 @@ class BC1Decoder final : public BlockDecoderTemplate<BC1Block, 4, 4> {
void DecodeBlock(Color4x4 dest, BC1Block *const block) const noexcept(ndebug) override;
Interpolator::Type GetInterpolatorType() const { return _interpolator->GetType(); }
constexpr bool WritesAlpha() const { return write_alpha; }
void DecodeBlock(Color4x4 dest, BC1Block *const block, bool allow_3color) const noexcept(ndebug);
InterpolatorPtr GetInterpolator() const { return _interpolator; }
bool write_alpha;

View File

@ -25,6 +25,7 @@
#include <cmath>
#include <cstdint>
#include <memory>
#include <stdexcept>
#include <type_traits>
#include "../../BlockView.h"
@ -39,28 +40,51 @@
#include "OrderTable.h"
#include "SingleColorTable.h"
namespace quicktex::s3tc {
namespace quicktex::s3tc {
// constructors
BC1Encoder::BC1Encoder(unsigned int level, bool allow_3color, bool allow_3color_black, InterpolatorPtr interpolator) : _interpolator(interpolator) {
OrderTable<3>::Generate();
BC1Encoder::BC1Encoder(unsigned int level, ColorMode color_mode, InterpolatorPtr interpolator) : _interpolator(interpolator), _color_mode(color_mode) {
if (color_mode != ColorMode::FourColor && color_mode != ColorMode::ThreeColor && color_mode != ColorMode::ThreeColorBlack) {
throw std::invalid_argument("Encoder color mode must be FourColor, ThreeColor, or ThreeColorBlack");
}
OrderTable<4>::Generate();
_single_match5 = SingleColorTable<5, 4>(_interpolator);
_single_match6 = SingleColorTable<6, 4>(_interpolator);
assert(OrderTable<3>::generated);
assert(OrderTable<4>::generated);
if (!OrderTable<4>::generated) throw std::runtime_error("Failed to generate 4-color order tables");
if (!_single_match5) throw std::runtime_error("Failed to generate 5-bit 4-color single color table");
if (!_single_match6) throw std::runtime_error("Failed to generate 6-bit 4-color single color table");
SetLevel(level, allow_3color, allow_3color_black);
if (color_mode != ColorMode::FourColor) {
OrderTable<3>::Generate();
_single_match5_half = SingleColorTable<5, 3>(_interpolator);
_single_match6_half = SingleColorTable<6, 3>(_interpolator);
if (!OrderTable<3>::generated) throw std::runtime_error("Failed to generate 3-color order tables");
if (!_single_match5_half) throw std::runtime_error("Failed to generate 5-bit 3-color single color table");
if (!_single_match6_half) throw std::runtime_error("Failed to generate 6-bit 3-color single color table");
}
SetLevel(level);
}
// Getters and Setters
void BC1Encoder::SetLevel(unsigned level, bool allow_3color, bool allow_3color_black) {
_flags = Flags::None;
void BC1Encoder::SetLevel(unsigned level) {
if (level > 19) throw std::invalid_argument("Level out of range, bust be between 0 and 18 inclusive"); // theres a secret level 19 but shhhhhh
two_ls_passes = false;
two_ep_passes = false;
two_cf_passes = false;
exhaustive = false;
_power_iterations = 4;
_error_mode = ErrorMode::Check2;
_endpoint_mode = EndpointMode::PCA;
_search_rounds = 0;
_orderings3 = 1;
_orderings4 = 1;
_orderings3 = 0;
_orderings4 = 0;
switch (level) {
case 0:
@ -78,118 +102,160 @@ void BC1Encoder::SetLevel(unsigned level, bool allow_3color, bool allow_3color_b
break;
case 3:
// Slightly stronger than stb_dxt HIGHQUAL.
_flags = Flags::TwoLeastSquaresPasses;
two_ls_passes = true;
break;
case 4:
_flags = Flags::TwoLeastSquaresPasses | Flags::Use6PowerIters;
two_ls_passes = true;
_error_mode = ErrorMode::Full;
_power_iterations = 6;
break;
default:
case 5:
// stb_dxt HIGHQUAL + permit 3 color (if it's enabled).
_flags = Flags::TwoLeastSquaresPasses;
two_ls_passes = true;
_error_mode = ErrorMode::Faster;
break;
case 6:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
two_ls_passes = true;
_orderings4 = 1;
_orderings3 = 1;
_error_mode = ErrorMode::Faster;
break;
case 7:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
two_ls_passes = true;
_error_mode = ErrorMode::Faster;
_orderings4 = 4;
_orderings3 = 1;
break;
case 8:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
two_ls_passes = true;
_error_mode = ErrorMode::Faster;
_orderings4 = 8;
_orderings3 = 1;
break;
case 9:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
two_ls_passes = true;
_error_mode = ErrorMode::Check2;
_orderings4 = 11;
_orderings3 = 3;
break;
case 10:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
two_ls_passes = true;
_error_mode = ErrorMode::Check2;
_orderings4 = 20;
_orderings3 = 8;
break;
case 11:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
two_ls_passes = true;
_error_mode = ErrorMode::Check2;
_orderings4 = 28;
_orderings3 = 16;
break;
case 12:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
two_ls_passes = true;
_error_mode = ErrorMode::Check2;
_orderings4 = 32;
_orderings3 = 32;
break;
case 13:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
two_ls_passes = true;
two_ep_passes = true;
_error_mode = ErrorMode::Full;
_orderings4 = 32;
_orderings3 = 32;
_search_rounds = 20;
_power_iterations = 6;
break;
case 14:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
two_ls_passes = true;
two_ep_passes = true;
_error_mode = ErrorMode::Full;
_orderings4 = 32;
_orderings3 = 32;
_search_rounds = 32;
_power_iterations = 6;
break;
case 15:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
two_ls_passes = true;
two_ep_passes = true;
_error_mode = ErrorMode::Full;
_orderings4 = 56;
_orderings3 = 32;
_search_rounds = 32;
_power_iterations = 6;
break;
case 16:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
two_ls_passes = true;
two_ep_passes = true;
_error_mode = ErrorMode::Full;
_orderings4 = 80;
_orderings3 = 32;
_search_rounds = 256;
_power_iterations = 6;
break;
case 17:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
two_ls_passes = true;
two_ep_passes = true;
_error_mode = ErrorMode::Full;
_orderings4 = 128;
_orderings3 = 32;
_search_rounds = 256;
break;
case 18:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints | Flags::Iterative;
two_ls_passes = true;
two_ep_passes = true;
two_cf_passes = true;
_error_mode = ErrorMode::Full;
_orderings4 = 128;
_orderings3 = 32;
_search_rounds = 256;
_power_iterations = 6;
break;
case 19:
// This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training.
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints | Flags::Iterative |
Flags::Exhaustive;
two_ls_passes = true;
two_ep_passes = true;
two_cf_passes = true;
exhaustive = true;
_error_mode = ErrorMode::Full;
_orderings4 = 128;
_orderings3 = 32;
_search_rounds = 256;
_power_iterations = 6;
break;
}
if (level >= 5 && allow_3color) { _flags |= Flags::Use3ColorBlocks; }
if (level >= 5 && allow_3color_black) { _flags |= Flags::Use3ColorBlocksForBlackPixels; }
_orderings4 = clamp(_orderings4, 1U, OrderTable<4>::BestOrderCount);
_orderings3 = clamp(_orderings3, 1U, OrderTable<3>::BestOrderCount);
}
void BC1Encoder::SetOrderings4(unsigned orderings4) { _orderings4 = clamp(orderings4, 1U, OrderTable<4>::BestOrderCount); }
void BC1Encoder::SetOrderings3(unsigned orderings3) { _orderings3 = clamp(orderings3, 1U, OrderTable<3>::BestOrderCount); }
void BC1Encoder::SetOrderings(OrderingPair orderings) {
SetOrderings4(std::get<0>(orderings));
SetOrderings3(std::get<1>(orderings));
}
void BC1Encoder::SetPowerIterations(unsigned int power_iters) { _power_iterations = clamp(power_iters, min_power_iterations, max_power_iterations); }
// Public methods
void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
@ -201,22 +267,25 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
auto metrics = pixels.GetMetrics();
bool needs_block_error = (_flags & Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks) != Flags::None;
const bool use_likely_orderings = (exhaustive || _orderings3 > 0 || _orderings4 > 0);
bool needs_block_error = use_likely_orderings;
needs_block_error |= (_color_mode == ColorMode::ThreeColor);
needs_block_error |= (_color_mode == ColorMode::ThreeColorBlack) && metrics.has_black;
needs_block_error |= (_error_mode != ErrorMode::None);
needs_block_error |= (_search_rounds > 0);
needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None);
ErrorMode error_mode = needs_block_error ? _error_mode : ErrorMode::None;
assert(!((_error_mode == ErrorMode::None) && needs_block_error));
const unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
const unsigned total_ep_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1;
const unsigned total_cf_iters = (_flags & Flags::Iterative) != Flags::None ? 2 : 1;
const unsigned total_ls_passes = two_ls_passes ? 2 : 1;
const unsigned total_cf_passes = two_cf_passes ? 2 : 1;
const unsigned total_ep_passes = (needs_block_error && two_ep_passes) ? 2 : 1;
// Initial block generation
EncodeResults orig;
EncodeResults result;
for (unsigned round = 0; round < total_ep_rounds; round++) {
for (unsigned round = 0; round < total_ep_passes; round++) {
EndpointMode endpoint_mode = (round == 1) ? EndpointMode::BoundingBox : _endpoint_mode;
EncodeResults trial_orig;
@ -234,20 +303,20 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
}
// First refinement pass using ordered cluster fit
if (result.error > 0 && (bool)(_flags & Flags::UseLikelyTotalOrderings)) {
for (unsigned iter = 0; iter < total_cf_iters; iter++) { RefineBlockCF<ColorMode::FourColor>(pixels, result, metrics, _error_mode, _orderings4); }
if (result.error > 0 && use_likely_orderings) {
for (unsigned iter = 0; iter < total_cf_passes; iter++) { RefineBlockCF<ColorMode::FourColor>(pixels, result, metrics, _error_mode, _orderings4); }
}
// try for 3-color block
if (result.error > 0 && (bool)(_flags & Flags::Use3ColorBlocks)) {
if (result.error > 0 && (bool)(_color_mode & ColorMode::ThreeColor)) {
EncodeResults trial_result = orig;
FindSelectors<ColorMode::ThreeColor>(pixels, trial_result, ErrorMode::Full);
RefineBlockLS<ColorMode::ThreeColor>(pixels, trial_result, metrics, ErrorMode::Full, total_ls_passes);
// First refinement pass using ordered cluster fit
if (trial_result.error > 0 && (bool)(_flags & Flags::UseLikelyTotalOrderings)) {
for (unsigned iter = 0; iter < total_cf_iters; iter++) {
if (trial_result.error > 0 && use_likely_orderings) {
for (unsigned iter = 0; iter < total_cf_passes; iter++) {
RefineBlockCF<ColorMode::ThreeColor>(pixels, trial_result, metrics, ErrorMode::Full, _orderings3);
}
}
@ -256,7 +325,7 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
}
// try for 3-color block with black
if (result.error > 0 && (bool)(_flags & Flags::Use3ColorBlocksForBlackPixels) && metrics.has_black && !metrics.max.IsBlack()) {
if (result.error > 0 && (_color_mode == ColorMode::ThreeColorBlack) && metrics.has_black && !metrics.max.IsBlack()) {
EncodeResults trial_result;
BlockMetrics metrics_no_black = pixels.GetMetrics(true);
@ -288,7 +357,7 @@ void BC1Encoder::WriteBlockSolid(Color color, BC1Block *dest) const {
EncodeResults result;
FindEndpointsSingleColor(result, color, false);
if ((_flags & (Flags::Use3ColorBlocks | Flags::Use3ColorBlocksForBlackPixels)) != Flags::None) {
if ((bool)(_color_mode & ColorMode::ThreeColor)) {
EncodeResults result_3color;
FindEndpointsSingleColor(result_3color, color, true);
@ -298,7 +367,7 @@ void BC1Encoder::WriteBlockSolid(Color color, BC1Block *dest) const {
min16 = result.low.Pack565Unscaled();
max16 = result.high.Pack565Unscaled();
if (result.color_mode == ColorMode::Solid) {
if (result.solid) {
if (min16 == max16) {
// make sure this isnt accidentally a 3-color block
// so make max16 > min16 (l > h)
@ -387,7 +456,7 @@ void BC1Encoder::FindEndpointsSingleColor(EncodeResults &block, Color color, boo
BC1MatchEntry match_g = match6->at(color.g);
BC1MatchEntry match_b = match5->at(color.b);
block.color_mode = is_3color ? ColorMode::ThreeColorSolid : ColorMode::Solid;
block.color_mode = is_3color ? ColorMode::ThreeColor : ColorMode::FourColor;
block.error = match_r.error + match_g.error + match_b.error;
block.low = Color(match_r.low, match_g.low, match_b.low);
block.high = Color(match_r.high, match_g.high, match_b.high);
@ -551,7 +620,6 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, EncodeResults &block, const Bloc
Vector4 axis = {306, 601, 117}; // Luma vector
Matrix4x4 covariance = Matrix4x4::Identity();
const unsigned total_power_iters = (_flags & Flags::Use6PowerIters) != Flags::None ? 6 : 4;
for (unsigned i = 0; i < 16; i++) {
auto val = pixels.Get(i);
@ -578,7 +646,7 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, EncodeResults &block, const Bloc
// using the covariance matrix, stretch the delta vector towards the primary axis of the data using power iteration
// the end result of this may actually be the same as the least squares approach, will have to do more research
for (unsigned power_iter = 0; power_iter < total_power_iters; power_iter++) { delta = covariance * delta; }
for (unsigned power_iter = 0; power_iter < _power_iterations; power_iter++) { delta = covariance * delta; }
// if we found any correlation, then this is our new axis. otherwise we fallback to the luma vector
float k = delta.MaxAbs(3);
@ -618,7 +686,6 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, EncodeResults &block, const Bloc
template <BC1Encoder::ColorMode M> void BC1Encoder::FindSelectors(Color4x4 &pixels, EncodeResults &block, ErrorMode error_mode) const {
assert(!((error_mode != ErrorMode::Full) && (bool)(M & ColorMode::ThreeColor)));
assert(!(bool)(M & ColorMode::Solid));
const int color_count = (unsigned)M & 0x0F;
@ -722,7 +789,6 @@ template <BC1Encoder::ColorMode M> void BC1Encoder::FindSelectors(Color4x4 &pixe
template <BC1Encoder::ColorMode M> bool BC1Encoder::RefineEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics) const {
const int color_count = (unsigned)M & 0x0F;
static_assert(color_count == 3 || color_count == 4);
static_assert(!(bool)(M & ColorMode::Solid));
assert(block.color_mode != ColorMode::Incomplete);
int denominator = color_count - 1;
@ -769,7 +835,6 @@ template <BC1Encoder::ColorMode M> bool BC1Encoder::RefineEndpointsLS(Color4x4 p
template <BC1Encoder::ColorMode M> void BC1Encoder::RefineEndpointsLS(std::array<Vector4, 17> &sums, EncodeResults &block, Vector4 &matrix, Hash hash) const {
const int color_count = (unsigned)M & 0x0F;
static_assert(color_count == 3 || color_count == 4);
static_assert(!(bool)(M & ColorMode::Solid));
assert(block.color_mode != ColorMode::Incomplete);
int denominator = color_count - 1;
@ -821,7 +886,6 @@ template <BC1Encoder::ColorMode M>
void BC1Encoder::RefineBlockCF(Color4x4 &pixels, EncodeResults &block, BlockMetrics &metrics, ErrorMode error_mode, unsigned orderings) const {
const int color_count = (unsigned)M & 0x0F;
static_assert(color_count == 3 || color_count == 4);
static_assert(!(bool)(M & ColorMode::Solid));
assert(block.color_mode != ColorMode::Incomplete);
using OrderTable = OrderTable<color_count>;
@ -852,9 +916,9 @@ void BC1Encoder::RefineBlockCF(Color4x4 &pixels, EncodeResults &block, BlockMetr
sums[i + 1] = sums[i] + color_vectors[p];
}
const unsigned q_total = ((_flags & Flags::Exhaustive) != Flags::None) ? OrderTable::OrderCount : orderings;
const unsigned q_total = exhaustive ? OrderTable::OrderCount : orderings;
for (Hash q = 0; q < q_total; q++) {
Hash trial_hash = ((_flags & Flags::Exhaustive) != Flags::None) ? q : OrderTable::BestOrders[start_hash][q];
Hash trial_hash = exhaustive ? q : OrderTable::BestOrders[start_hash][q];
Vector4 trial_matrix = OrderTable::GetFactors(trial_hash);
EncodeResults trial_result = orig;
@ -872,7 +936,7 @@ void BC1Encoder::RefineBlockCF(Color4x4 &pixels, EncodeResults &block, BlockMetr
}
void BC1Encoder::EndpointSearch(Color4x4 &pixels, EncodeResults &block) const {
if ((bool)(block.color_mode & ColorMode::Solid)) return;
if (block.solid) return;
static const std::array<Vector4Int, 16> Voxels = {{
{1, 0, 0, 3}, // 0
@ -938,5 +1002,4 @@ void BC1Encoder::EndpointSearch(Color4x4 &pixels, EncodeResults &block) const {
if (i - prev_improvement_index > 32) break;
}
}
} // namespace quicktex::s3tc

View File

@ -24,6 +24,7 @@
#include <cstddef>
#include <cstdint>
#include <memory>
#include <tuple>
#include <type_traits>
#include "../../BlockEncoder.h"
@ -42,43 +43,24 @@ namespace quicktex::s3tc {
class BC1Encoder final : public BlockEncoderTemplate<BC1Block, 4, 4> {
public:
using InterpolatorPtr = std::shared_ptr<Interpolator>;
using OrderingPair = std::tuple<unsigned, unsigned>;
enum class Flags {
None = 0,
inline static constexpr unsigned min_power_iterations = 4;
inline static constexpr unsigned max_power_iterations = 10;
// Try to improve quality using the most likely total orderings.
// The total_orderings_to_try parameter will then control the number of total orderings to try for 4 color blocks, and the
// total_orderings_to_try3 parameter will control the number of total orderings to try for 3 color blocks (if they are enabled).
UseLikelyTotalOrderings = 1,
enum class ColorMode {
// An incomplete block with invalid selectors or endpoints
Incomplete = 0x00,
// Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option).
// Recommended if you're enabling UseLikelyTotalOrderings.
TwoLeastSquaresPasses = 2,
// A block where color0 <= color1
ThreeColor = 0x03,
// Use3ColorBlocksForBlackPixels allows the BC1 encoder to use 3-color blocks for blocks containing black or very dark pixels.
// You shader/engine MUST ignore the alpha channel on textures encoded with this flag.
// Average quality goes up substantially for my 100 texture corpus (~.5 dB), so it's worth using if you can.
// Note the BC1 encoder does not actually support transparency in 3-color mode.
// Don't set when encoding to BC3.
Use3ColorBlocksForBlackPixels = 4,
// A block where color0 > color1
FourColor = 0x04,
// If Use3ColorBlocks is set, the encoder can use 3-color mode for a small but noticeable gain in average quality, but lower perf.
// If you also specify the UseLikelyTotalOrderings flag, set the total_orderings_to_try3 paramter to the number of total orderings to try.
// Don't set when encoding to BC3.
Use3ColorBlocks = 8,
// Iterative will greatly increase encode time, but is very slightly higher quality.
// Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, unless you just don't care about perf. at all.
Iterative = 16,
// Use 6 power iterations vs. 4 for PCA.
Use6PowerIters = 32,
// Check all total orderings - *very* slow. The encoder is not designed to be used in this way.
Exhaustive = 64,
// Try 2 different ways of choosing the initial endpoints.
TryAllInitialEndpoints = 128,
// A 3 color block with black pixels (selector 3)
UseBlack = 0x10,
ThreeColorBlack = ThreeColor | UseBlack,
};
enum class ErrorMode {
@ -111,17 +93,17 @@ class BC1Encoder final : public BlockEncoderTemplate<BC1Block, 4, 4> {
PCA
};
BC1Encoder(unsigned level, bool allow_3color, bool allow_3color_black, InterpolatorPtr interpolator);
bool exhaustive;
bool two_ls_passes;
bool two_ep_passes;
bool two_cf_passes;
BC1Encoder(unsigned int level = 5, bool allow_3color = true, bool allow_3color_black = true)
: BC1Encoder(level, allow_3color, allow_3color_black, std::make_shared<Interpolator>()) {}
BC1Encoder(unsigned level, ColorMode color_mode, InterpolatorPtr interpolator);
Interpolator::Type GetInterpolatorType() const { return _interpolator->GetType(); }
BC1Encoder(unsigned int level = 5, ColorMode color_mode = ColorMode::FourColor) : BC1Encoder(level, color_mode, std::make_shared<Interpolator>()) {}
void SetLevel(unsigned level, bool allow_3color = true, bool allow_3color_black = true);
Flags GetFlags() const { return _flags; }
void SetFlags(Flags flags) { _flags = flags; };
// Getters and Setters
void SetLevel(unsigned level);
ErrorMode GetErrorMode() const { return _error_mode; }
void SetErrorMode(ErrorMode error_mode) { _error_mode = error_mode; };
@ -129,14 +111,25 @@ class BC1Encoder final : public BlockEncoderTemplate<BC1Block, 4, 4> {
EndpointMode GetEndpointMode() const { return _endpoint_mode; }
void SetEndpointMode(EndpointMode endpoint_mode) { _endpoint_mode = endpoint_mode; }
InterpolatorPtr GetInterpolator() const { return _interpolator; }
ColorMode GetColorMode() const { return _color_mode; }
unsigned int GetSearchRounds() const { return _search_rounds; }
void SetSearchRounds(unsigned search_rounds) { _search_rounds = search_rounds; }
unsigned int GetOrderings4() const { return _orderings4; }
unsigned int GetOrderings3() const { return _orderings3; }
unsigned GetOrderings4() const { return _orderings4; }
unsigned GetOrderings3() const { return _orderings3; }
void SetOrderings4(unsigned orderings4);
void SetOrderings3(unsigned orderings3);
OrderingPair GetOrderings() const { return OrderingPair(_orderings4, _orderings3); }
void SetOrderings(OrderingPair orderings);
unsigned GetPowerIterations() const { return _power_iterations; }
void SetPowerIterations(unsigned power_iters);
// Public Methods
void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override;
virtual size_t MTThreshold() const override { return 16; }
@ -145,40 +138,32 @@ class BC1Encoder final : public BlockEncoderTemplate<BC1Block, 4, 4> {
using Hash = uint16_t;
using BlockMetrics = Color4x4::BlockMetrics;
enum class ColorMode {
Incomplete = 0x00,
ThreeColor = 0x03,
FourColor = 0x04,
UseBlack = 0x10,
Solid = 0x20,
ThreeColorBlack = ThreeColor | UseBlack,
ThreeColorSolid = ThreeColor | Solid,
FourColorSolid = FourColor | Solid,
};
// Unpacked BC1 block with metadata
struct EncodeResults {
Color low;
Color high;
std::array<uint8_t, 16> selectors;
ColorMode color_mode;
bool solid;
unsigned error = UINT_MAX;
};
const InterpolatorPtr _interpolator;
const ColorMode _color_mode;
// match tables used for single-color blocks
// Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible,
// with an included error value
// these depend on the interpolator
const MatchListPtr _single_match5 = SingleColorTable<5, 4>(_interpolator);
const MatchListPtr _single_match6 = SingleColorTable<6, 4>(_interpolator);
const MatchListPtr _single_match5_half = SingleColorTable<5, 3>(_interpolator);
const MatchListPtr _single_match6_half = SingleColorTable<6, 3>(_interpolator);
MatchListPtr _single_match5 = SingleColorTable<5, 4>(_interpolator);
MatchListPtr _single_match6 = SingleColorTable<6, 4>(_interpolator);
MatchListPtr _single_match5_half = SingleColorTable<5, 3>(_interpolator);
MatchListPtr _single_match6_half = SingleColorTable<6, 3>(_interpolator);
Flags _flags;
ErrorMode _error_mode;
EndpointMode _endpoint_mode;
unsigned _power_iterations;
unsigned _search_rounds;
unsigned _orderings4;
unsigned _orderings3;

View File

@ -45,37 +45,14 @@ void InitBC1(py::module_ &s3tc) {
auto block_decoder = py::type::of<BlockDecoder>();
// BC1Encoder
py::class_<BC1Encoder> bc1_encoder(bc1, "BC1Encoder", block_encoder);
py::class_<BC1Encoder> bc1_encoder(bc1, "BC1Encoder", block_encoder, "Encodes RGB textures to BC1");
bc1_encoder.def(py::init<unsigned, bool, bool>(), "level"_a = 5, "use_3color"_a = true, "use_3color_black"_a = true);
bc1_encoder.def(py::init<unsigned, bool, bool, InterpolatorPtr>(), "level"_a, "use_3color"_a, "use_3color_black"_a, "interpolator"_a);
bc1_encoder.def(py::init<unsigned, BC1Encoder::ColorMode>(), "level"_a = 5, "color_mode"_a = BC1Encoder::ColorMode::FourColor);
bc1_encoder.def(py::init<unsigned, BC1Encoder::ColorMode, InterpolatorPtr>(), "level"_a, "color_mode"_a, "interpolator"_a);
bc1_encoder.def("set_level", &BC1Encoder::SetLevel);
bc1_encoder.def_property_readonly("interpolator_type", &BC1Encoder::GetInterpolatorType);
bc1_encoder.def_property("flags", &BC1Encoder::GetFlags, &BC1Encoder::SetFlags);
bc1_encoder.def_property("error_mode", &BC1Encoder::GetErrorMode, &BC1Encoder::SetErrorMode);
bc1_encoder.def_property("endpoint_mode", &BC1Encoder::GetEndpointMode, &BC1Encoder::SetEndpointMode);
bc1_encoder.def_property("search_rounds", &BC1Encoder::GetSearchRounds, &BC1Encoder::SetSearchRounds);
bc1_encoder.def_property("orderings_4", &BC1Encoder::GetOrderings4, &BC1Encoder::SetOrderings4);
bc1_encoder.def_property("orderings_3", &BC1Encoder::GetOrderings3, &BC1Encoder::SetOrderings3);
bc1_encoder.def("set_level", &BC1Encoder::SetLevel, "Use a preset quality level, between 0 and 18. For better control, see the advanced API below");
using Flags = BC1Encoder::Flags;
py::enum_<Flags>(bc1_encoder, "Flags", py::arithmetic())
.value("UseLikelyTotalOrderings", Flags::UseLikelyTotalOrderings)
.value("TwoLeastSquaresPasses", Flags::TwoLeastSquaresPasses)
.value("Use3ColorBlocksForBlackPixels", Flags::Use3ColorBlocksForBlackPixels)
.value("Use3ColorBlocks", Flags::Use3ColorBlocks)
.value("Iterative", Flags::Iterative)
.value("Use6PowerIters", Flags::Use6PowerIters)
.value("Exhaustive", Flags::Exhaustive)
.value("TryAllInitialEndpoints", Flags::TryAllInitialEndpoints)
.def("__invert__", [](Flags f1) { return ~unsigned(f1); })
.def("__and__", [](Flags f1, Flags f2) { return unsigned(f1) & unsigned(f2); })
.def("__rand__", [](Flags f1, Flags f2) { return unsigned(f1) & unsigned(f2); })
.def("__or__", [](Flags f1, Flags f2) { return unsigned(f1) | unsigned(f2); })
.def("__ror__", [](Flags f1, Flags f2) { return unsigned(f1) | unsigned(f2); })
.def("__xor__", [](Flags f1, Flags f2) { return unsigned(f1) ^ unsigned(f2); })
.def("__rxor__", [](Flags f1, Flags f2) { return unsigned(f2) ^ unsigned(f1); });
// Advanced API
py::enum_<BC1Encoder::EndpointMode>(bc1_encoder, "EndpointMode")
.value("LeastSquares", BC1Encoder::EndpointMode::LeastSquares)
@ -89,13 +66,53 @@ void InitBC1(py::module_ &s3tc) {
.value("Check2", BC1Encoder::ErrorMode::Check2)
.value("Full", BC1Encoder::ErrorMode::Full);
py::enum_<BC1Encoder::ColorMode>(bc1_encoder, "ColorMode")
.value("FourColor", BC1Encoder::ColorMode::FourColor, "Default color mode. Only 4-color blocks will be output, where color0 > color1")
.value("ThreeColor", BC1Encoder::ColorMode::ThreeColor)
.value("ThreeColorBlack", BC1Encoder::ColorMode::ThreeColorBlack);
bc1_encoder.def_readonly_static("max_power_iterations", &BC1Encoder::max_power_iterations, "Maximum value of :py:attr:`BC1Encoder.power_iterations`.");
bc1_encoder.def_readonly_static("min_power_iterations", &BC1Encoder::min_power_iterations, "Minimum value of :py:attr:`BC1Encoder.power_iterations`.");
bc1_encoder.def_property_readonly("interpolator", &BC1Encoder::GetInterpolator, "The interpolator used by this encoder. This is a readonly property.");
bc1_encoder.def_property_readonly("color_mode", &BC1Encoder::GetColorMode, "The color mode used by this encoder. This is a readonly property.");
bc1_encoder.def_property("error_mode", &BC1Encoder::GetErrorMode, &BC1Encoder::SetErrorMode, "The error mode used by this encoder for finding selectors.");
bc1_encoder.def_property("endpoint_mode", &BC1Encoder::GetEndpointMode, &BC1Encoder::SetEndpointMode, "The endpoint mode used by this encoder.");
bc1_encoder.def_readwrite("two_ls_passes", &BC1Encoder::two_ls_passes,
"Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option).\n"
"Recommended if you're setting the orderings settings greater than 0.");
bc1_encoder.def_readwrite("two_ep_passes", &BC1Encoder::two_ep_passes, "Try 2 different ways of choosing the initial endpoints.");
bc1_encoder.def_readwrite("two_cf_passes", &BC1Encoder::two_cf_passes,
"Greatly increase encode time, with very slightly higher quality.\n"
"Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, "
"unless you just don't care about performance at all.");
bc1_encoder.def_readwrite("exhaustive", &BC1Encoder::exhaustive,
"Check all total orderings - *very* slow. The encoder is not designed to be used in this way");
bc1_encoder.def_property("search_rounds", &BC1Encoder::GetSearchRounds, &BC1Encoder::SetSearchRounds,
"Setting search rounds > 0 enables refining the final endpoints by examining nearby colors. A higher value has a higher quality "
"at the expense of performance.");
bc1_encoder.def_property("orderings", &BC1Encoder::GetOrderings, &BC1Encoder::SetOrderings,
"setting the orderings > 0 enables ordered cluster fit using a lookup table of similar blocks. Value is a tuple of (4 color "
"orders, 3 color orders), where higher values have a higher quality at the expense of performance.");
bc1_encoder.def_property("power_iterations", &BC1Encoder::GetPowerIterations, &BC1Encoder::SetPowerIterations,
"Number of power iterations used with the PCA endpoint mode. Value should be around 4 to 6. "
"Automatically clamped to between :py:const:`BC1Encoder.min_power_iterations` and :py:const:`BC1Encoder.max_power_iterations`");
// BC1Decoder
py::class_<BC1Decoder> bc1_decoder(bc1, "BC1Decoder", block_decoder);
py::class_<BC1Decoder> bc1_decoder(bc1, "BC1Decoder", block_decoder, "Decodes BC1 textures to RGB");
bc1_decoder.def(py::init<bool>(), "write_alpha"_a = false);
bc1_decoder.def(py::init<bool, InterpolatorPtr>(), "write_alpha"_a, "interpolator"_a);
bc1_decoder.def_property_readonly("interpolator_type", &BC1Decoder::GetInterpolatorType);
bc1_decoder.def_property_readonly("interpolator", &BC1Decoder::GetInterpolator);
bc1_decoder.def_readwrite("write_alpha", &BC1Decoder::write_alpha);
}
} // namespace quicktex::bindings

View File

@ -28,7 +28,7 @@
namespace quicktex::s3tc {
void BC3Decoder::DecodeBlock(Color4x4 dest, BC3Block *const block) const noexcept(ndebug) {
_bc1_decoder->DecodeBlock(dest, &(block->color_block));
_bc1_decoder->DecodeBlock(dest, &(block->color_block), false);
_bc4_decoder->DecodeBlock(dest, &(block->alpha_block), 3);
}
} // namespace quicktex::s3tc

View File

@ -37,9 +37,9 @@ class BC3Decoder : public BlockDecoderTemplate<BC3Block, 4, 4> {
using BC4DecoderPtr = std::shared_ptr<BC4Decoder>;
using InterpolatorPtr = std::shared_ptr<Interpolator>;
BC3Decoder(BC1DecoderPtr bc1_decoder) : _bc1_decoder(bc1_decoder), _bc4_decoder(std::make_shared<BC4Decoder>(3)) {}
BC3Decoder(InterpolatorPtr interpolator) : _bc1_decoder(std::make_shared<BC1Decoder>(interpolator)), _bc4_decoder(std::make_shared<BC4Decoder>(3)) {}
BC3Decoder(InterpolatorPtr interpolator = std::make_shared<Interpolator>()) : BC3Decoder(std::make_shared<BC1Decoder>(interpolator)) {}
BC3Decoder() : BC3Decoder(std::make_shared<Interpolator>()) {}
void DecodeBlock(Color4x4 dest, BC3Block *const block) const noexcept(ndebug) override;

View File

@ -36,13 +36,10 @@ class BC3Encoder : public BlockEncoderTemplate<BC3Block, 4, 4> {
using BC4EncoderPtr = std::shared_ptr<BC4Encoder>;
using InterpolatorPtr = std::shared_ptr<Interpolator>;
BC3Encoder(BC1EncoderPtr bc1_encoder) : _bc1_encoder(bc1_encoder), _bc4_encoder(std::make_shared<BC4Encoder>(3)) {}
BC3Encoder(unsigned level, InterpolatorPtr interpolator)
: _bc1_encoder(std::make_shared<BC1Encoder>(level, BC1Encoder::ColorMode::FourColor, interpolator)), _bc4_encoder(std::make_shared<BC4Encoder>(3)) {}
BC3Encoder(unsigned level, bool allow_3color, bool allow_3color_black, InterpolatorPtr interpolator)
: BC3Encoder(std::make_shared<BC1Encoder>(level, allow_3color, allow_3color_black, interpolator)) {}
BC3Encoder(unsigned level = 5, bool allow_3color = true, bool allow_3color_black = true)
: BC3Encoder(std::make_shared<BC1Encoder>(level, allow_3color, allow_3color_black, std::make_shared<Interpolator>())) {}
BC3Encoder(unsigned level = 5) : BC3Encoder(level, std::make_shared<Interpolator>()) {}
void EncodeBlock(Color4x4 pixels, BC3Block *dest) const override;

View File

@ -40,7 +40,6 @@ using InterpolatorPtr = std::shared_ptr<Interpolator>;
using BC1EncoderPtr = std::shared_ptr<BC1Encoder>;
using BC1DecoderPtr = std::shared_ptr<BC1Decoder>;
void InitBC3(py::module_ &s3tc) {
auto bc3 = s3tc.def_submodule("_bc3", "BC3 encoding/decoding module");
auto block_encoder = py::type::of<BlockEncoder>();
@ -49,9 +48,8 @@ void InitBC3(py::module_ &s3tc) {
// BC3Encoder
py::class_<BC3Encoder> bc3_encoder(bc3, "BC3Encoder", block_encoder);
bc3_encoder.def(py::init<BC1EncoderPtr>(), "bc1_encoder"_a);
bc3_encoder.def(py::init<unsigned, bool, bool>(), "level"_a = 5, "use_3color"_a = true, "use_3color_black"_a = true);
bc3_encoder.def(py::init<unsigned, bool, bool, InterpolatorPtr>(), "level"_a, "use_3color"_a, "use_3color_black"_a, "interpolator"_a);
bc3_encoder.def(py::init<unsigned>(), "level"_a = 5);
bc3_encoder.def(py::init<unsigned, InterpolatorPtr>(), "level"_a, "interpolator"_a);
bc3_encoder.def_property_readonly("bc1_encoder", &BC3Encoder::GetBC1Encoder);
bc3_encoder.def_property_readonly("bc4_encoder", &BC3Encoder::GetBC4Encoder);
@ -60,7 +58,6 @@ void InitBC3(py::module_ &s3tc) {
py::class_<BC3Decoder> bc3_decoder(bc3, "BC3Decoder", block_decoder);
bc3_decoder.def(py::init<>());
bc3_decoder.def(py::init<BC1DecoderPtr>(), "bc1_decoder"_a);
bc3_decoder.def(py::init<InterpolatorPtr>(), "interpolator"_a);
bc3_decoder.def_property_readonly("bc1_decoder", &BC3Decoder::GetBC1Decoder);

View File

@ -20,10 +20,7 @@
#include <pybind11/pybind11.h>
#include <array>
#include <cstddef>
#include <cstdint>
#include <stdexcept>
#include <string>
#include "../../BlockDecoder.h"
#include "../../BlockEncoder.h"

View File

@ -25,6 +25,7 @@
#include <stdexcept>
#include "../../util.h"
#include "../../Color.h"
namespace quicktex::s3tc {
@ -49,8 +50,9 @@ uint8_t Interpolator::Interpolate6(uint8_t v0, uint8_t v1) const { return Interp
uint8_t Interpolator::InterpolateHalf5(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
uint8_t Interpolator::InterpolateHalf6(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
std::array<Color, 4> Interpolator::InterpolateBC1(uint16_t low, uint16_t high) const {
return InterpolateBC1(Color::Unpack565Unscaled(low), Color::Unpack565Unscaled(high), (high >= low));
std::array<Color, 4> Interpolator::InterpolateBC1(uint16_t low, uint16_t high, bool allow_3color) const {
bool use_3color = allow_3color && (high >= low);
return InterpolateBC1(Color::Unpack565Unscaled(low), Color::Unpack565Unscaled(high), use_3color);
}
std::array<Color, 4> Interpolator::InterpolateBC1(Color low, Color high, bool use_3color) const {

View File

@ -96,7 +96,7 @@ class Interpolator {
* @param high second 5:6:5 color for the block
* @return and array of 4 Color values, with indices matching BC1 selectors
*/
virtual std::array<Color, 4> InterpolateBC1(uint16_t low, uint16_t high) const;
virtual std::array<Color, 4> InterpolateBC1(uint16_t low, uint16_t high, bool allow_3color = true) const;
virtual std::array<Color, 4> InterpolateBC1(Color low, Color high, bool use_3color) const;

View File

@ -20,6 +20,7 @@
#include <pybind11/pybind11.h>
#include <array>
#include <memory>
#include "Interpolator.h"