From 71119b92792350c2f0f8e7c81c9a1128d54b2d46 Mon Sep 17 00:00:00 2001 From: drewcassidy Date: Thu, 18 Mar 2021 02:34:31 -0700 Subject: [PATCH] Improved bindings and dont use punchthrough in BC3 --- docs/conf.py | 12 +- docs/reference/s3tc.rst | 5 +- quicktex/s3tc/_bindings.cpp | 18 +- quicktex/s3tc/bc1/BC1Decoder.cpp | 6 +- quicktex/s3tc/bc1/BC1Decoder.h | 5 +- quicktex/s3tc/bc1/BC1Encoder.cpp | 175 +++++++++++++------- quicktex/s3tc/bc1/BC1Encoder.h | 101 +++++------ quicktex/s3tc/bc1/_bindings.cpp | 77 +++++---- quicktex/s3tc/bc3/BC3Decoder.cpp | 2 +- quicktex/s3tc/bc3/BC3Decoder.h | 4 +- quicktex/s3tc/bc3/BC3Encoder.h | 9 +- quicktex/s3tc/bc3/_bindings.cpp | 7 +- quicktex/s3tc/bc5/_bindings.cpp | 3 - quicktex/s3tc/interpolator/Interpolator.cpp | 6 +- quicktex/s3tc/interpolator/Interpolator.h | 2 +- quicktex/s3tc/interpolator/_bindings.cpp | 1 + 16 files changed, 243 insertions(+), 190 deletions(-) diff --git a/docs/conf.py b/docs/conf.py index 7141662..e27a111 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -10,9 +10,9 @@ # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # -import os -import sys -sys.path.insert(0, os.path.abspath('../quicktex')) +# import os +# import sys +# sys.path.insert(0, os.path.abspath('..')) # -- Project information ----------------------------------------------------- @@ -27,8 +27,8 @@ author = 'Andrew Cassidy' # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'myst_parser', - 'sphinx_rtd_theme', + # 'myst_parser', + # 'sphinx_rtd_theme', 'sphinx.ext.autodoc' ] @@ -45,7 +45,7 @@ exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # -html_theme = 'sphinx_rtd_theme' +# html_theme = 'sphinx_rtd_theme' # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, diff --git a/docs/reference/s3tc.rst b/docs/reference/s3tc.rst index 6d9a8d3..aa8d649 100644 --- a/docs/reference/s3tc.rst +++ b/docs/reference/s3tc.rst @@ -1,11 +1,12 @@ -.. py:module:: quicktex.s3tc - s3tc module =========== +this is some text + bc1 module ---------- .. automodule:: quicktex.s3tc.bc1 :members: :undoc-members: + :special-members: diff --git a/quicktex/s3tc/_bindings.cpp b/quicktex/s3tc/_bindings.cpp index 4a74f24..6c0022c 100644 --- a/quicktex/s3tc/_bindings.cpp +++ b/quicktex/s3tc/_bindings.cpp @@ -36,22 +36,10 @@ void InitBC5(py::module_ &s3tc); void InitS3TC(py::module_ &m) { py::module_ s3tc = m.def_submodule("_s3tc", "s3tc compression library based on rgbcx.h written by Richard Goldreich"); -/* using IType = Interpolator::Type; - auto interpolator_type = py::enum_(s3tc, "InterpolatorType", R"pbdoc( -An enum representing various methods for interpolating colors, used by the BC1 and BC3 encoders/decoders. -Vendor-specific interpolation modes should only be used when the result will only be used on that type of GPU. -For most applications, :py:attr:`~quicktex.s3tc.InterpolatorType.Ideal` should be used. -)pbdoc"); - - interpolator_type.value("Ideal", IType::Ideal, "The default mode, with no rounding for colors 2 and 3. This matches the D3D10 docs on BC1."); - interpolator_type.value("IdealRound", IType::IdealRound, "Round colors 2 and 3. Matches the AMD Compressonator tool and the D3D9 docs on DXT1."); - interpolator_type.value("Nvidia", IType::Nvidia, "Nvidia GPU mode."); - interpolator_type.value("AMD", IType::AMD, "AMD GPU mode.");*/ - InitInterpolator(s3tc); InitBC1(s3tc); -// InitBC3(s3tc); -// InitBC4(s3tc); -// InitBC5(s3tc); + // InitBC3(s3tc); + // InitBC4(s3tc); + // InitBC5(s3tc); } } // namespace quicktex::bindings diff --git a/quicktex/s3tc/bc1/BC1Decoder.cpp b/quicktex/s3tc/bc1/BC1Decoder.cpp index 6b64f56..6a39bea 100644 --- a/quicktex/s3tc/bc1/BC1Decoder.cpp +++ b/quicktex/s3tc/bc1/BC1Decoder.cpp @@ -30,10 +30,14 @@ namespace quicktex::s3tc { void BC1Decoder::DecodeBlock(Color4x4 dest, BC1Block *const block) const noexcept(ndebug) { + DecodeBlock(dest, block, true); +} + +void BC1Decoder::DecodeBlock(Color4x4 dest, BC1Block *const block, bool allow_3color) const noexcept(ndebug) { const auto l = block->GetLowColor(); const auto h = block->GetHighColor(); const auto selectors = block->UnpackSelectors(); - const auto colors = _interpolator->InterpolateBC1(l, h); + const auto colors = _interpolator->InterpolateBC1(l, h, allow_3color); for (unsigned y = 0; y < 4; y++) { for (unsigned x = 0; x < 4; x++) { diff --git a/quicktex/s3tc/bc1/BC1Decoder.h b/quicktex/s3tc/bc1/BC1Decoder.h index e873b01..c20b0a1 100644 --- a/quicktex/s3tc/bc1/BC1Decoder.h +++ b/quicktex/s3tc/bc1/BC1Decoder.h @@ -41,8 +41,9 @@ class BC1Decoder final : public BlockDecoderTemplate { void DecodeBlock(Color4x4 dest, BC1Block *const block) const noexcept(ndebug) override; - Interpolator::Type GetInterpolatorType() const { return _interpolator->GetType(); } - constexpr bool WritesAlpha() const { return write_alpha; } + void DecodeBlock(Color4x4 dest, BC1Block *const block, bool allow_3color) const noexcept(ndebug); + + InterpolatorPtr GetInterpolator() const { return _interpolator; } bool write_alpha; diff --git a/quicktex/s3tc/bc1/BC1Encoder.cpp b/quicktex/s3tc/bc1/BC1Encoder.cpp index e8b97c2..ede02a9 100644 --- a/quicktex/s3tc/bc1/BC1Encoder.cpp +++ b/quicktex/s3tc/bc1/BC1Encoder.cpp @@ -25,6 +25,7 @@ #include #include #include +#include #include #include "../../BlockView.h" @@ -39,28 +40,51 @@ #include "OrderTable.h" #include "SingleColorTable.h" -namespace quicktex::s3tc { +namespace quicktex::s3tc { // constructors -BC1Encoder::BC1Encoder(unsigned int level, bool allow_3color, bool allow_3color_black, InterpolatorPtr interpolator) : _interpolator(interpolator) { - OrderTable<3>::Generate(); +BC1Encoder::BC1Encoder(unsigned int level, ColorMode color_mode, InterpolatorPtr interpolator) : _interpolator(interpolator), _color_mode(color_mode) { + if (color_mode != ColorMode::FourColor && color_mode != ColorMode::ThreeColor && color_mode != ColorMode::ThreeColorBlack) { + throw std::invalid_argument("Encoder color mode must be FourColor, ThreeColor, or ThreeColorBlack"); + } + OrderTable<4>::Generate(); + _single_match5 = SingleColorTable<5, 4>(_interpolator); + _single_match6 = SingleColorTable<6, 4>(_interpolator); - assert(OrderTable<3>::generated); - assert(OrderTable<4>::generated); + if (!OrderTable<4>::generated) throw std::runtime_error("Failed to generate 4-color order tables"); + if (!_single_match5) throw std::runtime_error("Failed to generate 5-bit 4-color single color table"); + if (!_single_match6) throw std::runtime_error("Failed to generate 6-bit 4-color single color table"); - SetLevel(level, allow_3color, allow_3color_black); + if (color_mode != ColorMode::FourColor) { + OrderTable<3>::Generate(); + _single_match5_half = SingleColorTable<5, 3>(_interpolator); + _single_match6_half = SingleColorTable<6, 3>(_interpolator); + + if (!OrderTable<3>::generated) throw std::runtime_error("Failed to generate 3-color order tables"); + if (!_single_match5_half) throw std::runtime_error("Failed to generate 5-bit 3-color single color table"); + if (!_single_match6_half) throw std::runtime_error("Failed to generate 6-bit 3-color single color table"); + } + + SetLevel(level); } // Getters and Setters -void BC1Encoder::SetLevel(unsigned level, bool allow_3color, bool allow_3color_black) { - _flags = Flags::None; +void BC1Encoder::SetLevel(unsigned level) { + if (level > 19) throw std::invalid_argument("Level out of range, bust be between 0 and 18 inclusive"); // theres a secret level 19 but shhhhhh + + two_ls_passes = false; + two_ep_passes = false; + two_cf_passes = false; + exhaustive = false; + + _power_iterations = 4; _error_mode = ErrorMode::Check2; _endpoint_mode = EndpointMode::PCA; _search_rounds = 0; - _orderings3 = 1; - _orderings4 = 1; + _orderings3 = 0; + _orderings4 = 0; switch (level) { case 0: @@ -78,118 +102,160 @@ void BC1Encoder::SetLevel(unsigned level, bool allow_3color, bool allow_3color_b break; case 3: // Slightly stronger than stb_dxt HIGHQUAL. - _flags = Flags::TwoLeastSquaresPasses; + two_ls_passes = true; break; case 4: - _flags = Flags::TwoLeastSquaresPasses | Flags::Use6PowerIters; + two_ls_passes = true; + _error_mode = ErrorMode::Full; + _power_iterations = 6; break; default: case 5: // stb_dxt HIGHQUAL + permit 3 color (if it's enabled). - _flags = Flags::TwoLeastSquaresPasses; + two_ls_passes = true; + _error_mode = ErrorMode::Faster; break; case 6: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + two_ls_passes = true; + + _orderings4 = 1; + _orderings3 = 1; _error_mode = ErrorMode::Faster; break; case 7: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + two_ls_passes = true; + _error_mode = ErrorMode::Faster; _orderings4 = 4; + _orderings3 = 1; break; case 8: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + two_ls_passes = true; + _error_mode = ErrorMode::Faster; _orderings4 = 8; + _orderings3 = 1; break; case 9: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + two_ls_passes = true; + _error_mode = ErrorMode::Check2; _orderings4 = 11; _orderings3 = 3; break; case 10: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + two_ls_passes = true; + _error_mode = ErrorMode::Check2; _orderings4 = 20; _orderings3 = 8; break; case 11: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + two_ls_passes = true; + _error_mode = ErrorMode::Check2; _orderings4 = 28; _orderings3 = 16; break; case 12: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + two_ls_passes = true; + _error_mode = ErrorMode::Check2; _orderings4 = 32; _orderings3 = 32; break; case 13: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + two_ls_passes = true; + two_ep_passes = true; + _error_mode = ErrorMode::Full; _orderings4 = 32; _orderings3 = 32; _search_rounds = 20; + _power_iterations = 6; break; case 14: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + two_ls_passes = true; + two_ep_passes = true; + _error_mode = ErrorMode::Full; _orderings4 = 32; _orderings3 = 32; _search_rounds = 32; + _power_iterations = 6; break; case 15: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + two_ls_passes = true; + two_ep_passes = true; + _error_mode = ErrorMode::Full; _orderings4 = 56; _orderings3 = 32; _search_rounds = 32; + _power_iterations = 6; break; case 16: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + two_ls_passes = true; + two_ep_passes = true; + _error_mode = ErrorMode::Full; _orderings4 = 80; _orderings3 = 32; _search_rounds = 256; + _power_iterations = 6; + break; case 17: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + two_ls_passes = true; + two_ep_passes = true; + _error_mode = ErrorMode::Full; _orderings4 = 128; _orderings3 = 32; _search_rounds = 256; break; case 18: - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints | Flags::Iterative; + two_ls_passes = true; + two_ep_passes = true; + two_cf_passes = true; + _error_mode = ErrorMode::Full; _orderings4 = 128; _orderings3 = 32; _search_rounds = 256; + _power_iterations = 6; break; + case 19: // This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training. - _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints | Flags::Iterative | - Flags::Exhaustive; + + two_ls_passes = true; + two_ep_passes = true; + two_cf_passes = true; + exhaustive = true; + _error_mode = ErrorMode::Full; _orderings4 = 128; _orderings3 = 32; _search_rounds = 256; + _power_iterations = 6; break; } - if (level >= 5 && allow_3color) { _flags |= Flags::Use3ColorBlocks; } - if (level >= 5 && allow_3color_black) { _flags |= Flags::Use3ColorBlocksForBlackPixels; } - _orderings4 = clamp(_orderings4, 1U, OrderTable<4>::BestOrderCount); _orderings3 = clamp(_orderings3, 1U, OrderTable<3>::BestOrderCount); } void BC1Encoder::SetOrderings4(unsigned orderings4) { _orderings4 = clamp(orderings4, 1U, OrderTable<4>::BestOrderCount); } void BC1Encoder::SetOrderings3(unsigned orderings3) { _orderings3 = clamp(orderings3, 1U, OrderTable<3>::BestOrderCount); } +void BC1Encoder::SetOrderings(OrderingPair orderings) { + SetOrderings4(std::get<0>(orderings)); + SetOrderings3(std::get<1>(orderings)); +} + +void BC1Encoder::SetPowerIterations(unsigned int power_iters) { _power_iterations = clamp(power_iters, min_power_iterations, max_power_iterations); } // Public methods void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { @@ -201,22 +267,25 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { auto metrics = pixels.GetMetrics(); - bool needs_block_error = (_flags & Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks) != Flags::None; + const bool use_likely_orderings = (exhaustive || _orderings3 > 0 || _orderings4 > 0); + + bool needs_block_error = use_likely_orderings; + needs_block_error |= (_color_mode == ColorMode::ThreeColor); + needs_block_error |= (_color_mode == ColorMode::ThreeColorBlack) && metrics.has_black; needs_block_error |= (_error_mode != ErrorMode::None); needs_block_error |= (_search_rounds > 0); - needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None); ErrorMode error_mode = needs_block_error ? _error_mode : ErrorMode::None; assert(!((_error_mode == ErrorMode::None) && needs_block_error)); - const unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1; - const unsigned total_ep_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1; - const unsigned total_cf_iters = (_flags & Flags::Iterative) != Flags::None ? 2 : 1; + const unsigned total_ls_passes = two_ls_passes ? 2 : 1; + const unsigned total_cf_passes = two_cf_passes ? 2 : 1; + const unsigned total_ep_passes = (needs_block_error && two_ep_passes) ? 2 : 1; // Initial block generation EncodeResults orig; EncodeResults result; - for (unsigned round = 0; round < total_ep_rounds; round++) { + for (unsigned round = 0; round < total_ep_passes; round++) { EndpointMode endpoint_mode = (round == 1) ? EndpointMode::BoundingBox : _endpoint_mode; EncodeResults trial_orig; @@ -234,20 +303,20 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { } // First refinement pass using ordered cluster fit - if (result.error > 0 && (bool)(_flags & Flags::UseLikelyTotalOrderings)) { - for (unsigned iter = 0; iter < total_cf_iters; iter++) { RefineBlockCF(pixels, result, metrics, _error_mode, _orderings4); } + if (result.error > 0 && use_likely_orderings) { + for (unsigned iter = 0; iter < total_cf_passes; iter++) { RefineBlockCF(pixels, result, metrics, _error_mode, _orderings4); } } // try for 3-color block - if (result.error > 0 && (bool)(_flags & Flags::Use3ColorBlocks)) { + if (result.error > 0 && (bool)(_color_mode & ColorMode::ThreeColor)) { EncodeResults trial_result = orig; FindSelectors(pixels, trial_result, ErrorMode::Full); RefineBlockLS(pixels, trial_result, metrics, ErrorMode::Full, total_ls_passes); // First refinement pass using ordered cluster fit - if (trial_result.error > 0 && (bool)(_flags & Flags::UseLikelyTotalOrderings)) { - for (unsigned iter = 0; iter < total_cf_iters; iter++) { + if (trial_result.error > 0 && use_likely_orderings) { + for (unsigned iter = 0; iter < total_cf_passes; iter++) { RefineBlockCF(pixels, trial_result, metrics, ErrorMode::Full, _orderings3); } } @@ -256,7 +325,7 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { } // try for 3-color block with black - if (result.error > 0 && (bool)(_flags & Flags::Use3ColorBlocksForBlackPixels) && metrics.has_black && !metrics.max.IsBlack()) { + if (result.error > 0 && (_color_mode == ColorMode::ThreeColorBlack) && metrics.has_black && !metrics.max.IsBlack()) { EncodeResults trial_result; BlockMetrics metrics_no_black = pixels.GetMetrics(true); @@ -288,7 +357,7 @@ void BC1Encoder::WriteBlockSolid(Color color, BC1Block *dest) const { EncodeResults result; FindEndpointsSingleColor(result, color, false); - if ((_flags & (Flags::Use3ColorBlocks | Flags::Use3ColorBlocksForBlackPixels)) != Flags::None) { + if ((bool)(_color_mode & ColorMode::ThreeColor)) { EncodeResults result_3color; FindEndpointsSingleColor(result_3color, color, true); @@ -298,7 +367,7 @@ void BC1Encoder::WriteBlockSolid(Color color, BC1Block *dest) const { min16 = result.low.Pack565Unscaled(); max16 = result.high.Pack565Unscaled(); - if (result.color_mode == ColorMode::Solid) { + if (result.solid) { if (min16 == max16) { // make sure this isnt accidentally a 3-color block // so make max16 > min16 (l > h) @@ -387,7 +456,7 @@ void BC1Encoder::FindEndpointsSingleColor(EncodeResults &block, Color color, boo BC1MatchEntry match_g = match6->at(color.g); BC1MatchEntry match_b = match5->at(color.b); - block.color_mode = is_3color ? ColorMode::ThreeColorSolid : ColorMode::Solid; + block.color_mode = is_3color ? ColorMode::ThreeColor : ColorMode::FourColor; block.error = match_r.error + match_g.error + match_b.error; block.low = Color(match_r.low, match_g.low, match_b.low); block.high = Color(match_r.high, match_g.high, match_b.high); @@ -551,7 +620,6 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, EncodeResults &block, const Bloc Vector4 axis = {306, 601, 117}; // Luma vector Matrix4x4 covariance = Matrix4x4::Identity(); - const unsigned total_power_iters = (_flags & Flags::Use6PowerIters) != Flags::None ? 6 : 4; for (unsigned i = 0; i < 16; i++) { auto val = pixels.Get(i); @@ -578,7 +646,7 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, EncodeResults &block, const Bloc // using the covariance matrix, stretch the delta vector towards the primary axis of the data using power iteration // the end result of this may actually be the same as the least squares approach, will have to do more research - for (unsigned power_iter = 0; power_iter < total_power_iters; power_iter++) { delta = covariance * delta; } + for (unsigned power_iter = 0; power_iter < _power_iterations; power_iter++) { delta = covariance * delta; } // if we found any correlation, then this is our new axis. otherwise we fallback to the luma vector float k = delta.MaxAbs(3); @@ -618,7 +686,6 @@ void BC1Encoder::FindEndpoints(Color4x4 pixels, EncodeResults &block, const Bloc template void BC1Encoder::FindSelectors(Color4x4 &pixels, EncodeResults &block, ErrorMode error_mode) const { assert(!((error_mode != ErrorMode::Full) && (bool)(M & ColorMode::ThreeColor))); - assert(!(bool)(M & ColorMode::Solid)); const int color_count = (unsigned)M & 0x0F; @@ -722,7 +789,6 @@ template void BC1Encoder::FindSelectors(Color4x4 &pixe template bool BC1Encoder::RefineEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics) const { const int color_count = (unsigned)M & 0x0F; static_assert(color_count == 3 || color_count == 4); - static_assert(!(bool)(M & ColorMode::Solid)); assert(block.color_mode != ColorMode::Incomplete); int denominator = color_count - 1; @@ -769,7 +835,6 @@ template bool BC1Encoder::RefineEndpointsLS(Color4x4 p template void BC1Encoder::RefineEndpointsLS(std::array &sums, EncodeResults &block, Vector4 &matrix, Hash hash) const { const int color_count = (unsigned)M & 0x0F; static_assert(color_count == 3 || color_count == 4); - static_assert(!(bool)(M & ColorMode::Solid)); assert(block.color_mode != ColorMode::Incomplete); int denominator = color_count - 1; @@ -821,7 +886,6 @@ template void BC1Encoder::RefineBlockCF(Color4x4 &pixels, EncodeResults &block, BlockMetrics &metrics, ErrorMode error_mode, unsigned orderings) const { const int color_count = (unsigned)M & 0x0F; static_assert(color_count == 3 || color_count == 4); - static_assert(!(bool)(M & ColorMode::Solid)); assert(block.color_mode != ColorMode::Incomplete); using OrderTable = OrderTable; @@ -852,9 +916,9 @@ void BC1Encoder::RefineBlockCF(Color4x4 &pixels, EncodeResults &block, BlockMetr sums[i + 1] = sums[i] + color_vectors[p]; } - const unsigned q_total = ((_flags & Flags::Exhaustive) != Flags::None) ? OrderTable::OrderCount : orderings; + const unsigned q_total = exhaustive ? OrderTable::OrderCount : orderings; for (Hash q = 0; q < q_total; q++) { - Hash trial_hash = ((_flags & Flags::Exhaustive) != Flags::None) ? q : OrderTable::BestOrders[start_hash][q]; + Hash trial_hash = exhaustive ? q : OrderTable::BestOrders[start_hash][q]; Vector4 trial_matrix = OrderTable::GetFactors(trial_hash); EncodeResults trial_result = orig; @@ -872,7 +936,7 @@ void BC1Encoder::RefineBlockCF(Color4x4 &pixels, EncodeResults &block, BlockMetr } void BC1Encoder::EndpointSearch(Color4x4 &pixels, EncodeResults &block) const { - if ((bool)(block.color_mode & ColorMode::Solid)) return; + if (block.solid) return; static const std::array Voxels = {{ {1, 0, 0, 3}, // 0 @@ -938,5 +1002,4 @@ void BC1Encoder::EndpointSearch(Color4x4 &pixels, EncodeResults &block) const { if (i - prev_improvement_index > 32) break; } } - } // namespace quicktex::s3tc \ No newline at end of file diff --git a/quicktex/s3tc/bc1/BC1Encoder.h b/quicktex/s3tc/bc1/BC1Encoder.h index 2f4ec97..b58b887 100644 --- a/quicktex/s3tc/bc1/BC1Encoder.h +++ b/quicktex/s3tc/bc1/BC1Encoder.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include "../../BlockEncoder.h" @@ -42,43 +43,24 @@ namespace quicktex::s3tc { class BC1Encoder final : public BlockEncoderTemplate { public: using InterpolatorPtr = std::shared_ptr; + using OrderingPair = std::tuple; - enum class Flags { - None = 0, + inline static constexpr unsigned min_power_iterations = 4; + inline static constexpr unsigned max_power_iterations = 10; - // Try to improve quality using the most likely total orderings. - // The total_orderings_to_try parameter will then control the number of total orderings to try for 4 color blocks, and the - // total_orderings_to_try3 parameter will control the number of total orderings to try for 3 color blocks (if they are enabled). - UseLikelyTotalOrderings = 1, + enum class ColorMode { + // An incomplete block with invalid selectors or endpoints + Incomplete = 0x00, - // Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option). - // Recommended if you're enabling UseLikelyTotalOrderings. - TwoLeastSquaresPasses = 2, + // A block where color0 <= color1 + ThreeColor = 0x03, - // Use3ColorBlocksForBlackPixels allows the BC1 encoder to use 3-color blocks for blocks containing black or very dark pixels. - // You shader/engine MUST ignore the alpha channel on textures encoded with this flag. - // Average quality goes up substantially for my 100 texture corpus (~.5 dB), so it's worth using if you can. - // Note the BC1 encoder does not actually support transparency in 3-color mode. - // Don't set when encoding to BC3. - Use3ColorBlocksForBlackPixels = 4, + // A block where color0 > color1 + FourColor = 0x04, - // If Use3ColorBlocks is set, the encoder can use 3-color mode for a small but noticeable gain in average quality, but lower perf. - // If you also specify the UseLikelyTotalOrderings flag, set the total_orderings_to_try3 paramter to the number of total orderings to try. - // Don't set when encoding to BC3. - Use3ColorBlocks = 8, - - // Iterative will greatly increase encode time, but is very slightly higher quality. - // Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, unless you just don't care about perf. at all. - Iterative = 16, - - // Use 6 power iterations vs. 4 for PCA. - Use6PowerIters = 32, - - // Check all total orderings - *very* slow. The encoder is not designed to be used in this way. - Exhaustive = 64, - - // Try 2 different ways of choosing the initial endpoints. - TryAllInitialEndpoints = 128, + // A 3 color block with black pixels (selector 3) + UseBlack = 0x10, + ThreeColorBlack = ThreeColor | UseBlack, }; enum class ErrorMode { @@ -111,17 +93,17 @@ class BC1Encoder final : public BlockEncoderTemplate { PCA }; - BC1Encoder(unsigned level, bool allow_3color, bool allow_3color_black, InterpolatorPtr interpolator); + bool exhaustive; + bool two_ls_passes; + bool two_ep_passes; + bool two_cf_passes; - BC1Encoder(unsigned int level = 5, bool allow_3color = true, bool allow_3color_black = true) - : BC1Encoder(level, allow_3color, allow_3color_black, std::make_shared()) {} + BC1Encoder(unsigned level, ColorMode color_mode, InterpolatorPtr interpolator); - Interpolator::Type GetInterpolatorType() const { return _interpolator->GetType(); } + BC1Encoder(unsigned int level = 5, ColorMode color_mode = ColorMode::FourColor) : BC1Encoder(level, color_mode, std::make_shared()) {} - void SetLevel(unsigned level, bool allow_3color = true, bool allow_3color_black = true); - - Flags GetFlags() const { return _flags; } - void SetFlags(Flags flags) { _flags = flags; }; + // Getters and Setters + void SetLevel(unsigned level); ErrorMode GetErrorMode() const { return _error_mode; } void SetErrorMode(ErrorMode error_mode) { _error_mode = error_mode; }; @@ -129,14 +111,25 @@ class BC1Encoder final : public BlockEncoderTemplate { EndpointMode GetEndpointMode() const { return _endpoint_mode; } void SetEndpointMode(EndpointMode endpoint_mode) { _endpoint_mode = endpoint_mode; } + InterpolatorPtr GetInterpolator() const { return _interpolator; } + ColorMode GetColorMode() const { return _color_mode; } + unsigned int GetSearchRounds() const { return _search_rounds; } void SetSearchRounds(unsigned search_rounds) { _search_rounds = search_rounds; } - unsigned int GetOrderings4() const { return _orderings4; } - unsigned int GetOrderings3() const { return _orderings3; } + unsigned GetOrderings4() const { return _orderings4; } + unsigned GetOrderings3() const { return _orderings3; } + void SetOrderings4(unsigned orderings4); void SetOrderings3(unsigned orderings3); + OrderingPair GetOrderings() const { return OrderingPair(_orderings4, _orderings3); } + void SetOrderings(OrderingPair orderings); + + unsigned GetPowerIterations() const { return _power_iterations; } + void SetPowerIterations(unsigned power_iters); + + // Public Methods void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override; virtual size_t MTThreshold() const override { return 16; } @@ -145,40 +138,32 @@ class BC1Encoder final : public BlockEncoderTemplate { using Hash = uint16_t; using BlockMetrics = Color4x4::BlockMetrics; - enum class ColorMode { - Incomplete = 0x00, - ThreeColor = 0x03, - FourColor = 0x04, - UseBlack = 0x10, - Solid = 0x20, - ThreeColorBlack = ThreeColor | UseBlack, - ThreeColorSolid = ThreeColor | Solid, - FourColorSolid = FourColor | Solid, - }; - // Unpacked BC1 block with metadata struct EncodeResults { Color low; Color high; std::array selectors; ColorMode color_mode; + bool solid; unsigned error = UINT_MAX; }; const InterpolatorPtr _interpolator; + const ColorMode _color_mode; // match tables used for single-color blocks // Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible, // with an included error value // these depend on the interpolator - const MatchListPtr _single_match5 = SingleColorTable<5, 4>(_interpolator); - const MatchListPtr _single_match6 = SingleColorTable<6, 4>(_interpolator); - const MatchListPtr _single_match5_half = SingleColorTable<5, 3>(_interpolator); - const MatchListPtr _single_match6_half = SingleColorTable<6, 3>(_interpolator); + MatchListPtr _single_match5 = SingleColorTable<5, 4>(_interpolator); + MatchListPtr _single_match6 = SingleColorTable<6, 4>(_interpolator); + MatchListPtr _single_match5_half = SingleColorTable<5, 3>(_interpolator); + MatchListPtr _single_match6_half = SingleColorTable<6, 3>(_interpolator); - Flags _flags; ErrorMode _error_mode; EndpointMode _endpoint_mode; + + unsigned _power_iterations; unsigned _search_rounds; unsigned _orderings4; unsigned _orderings3; diff --git a/quicktex/s3tc/bc1/_bindings.cpp b/quicktex/s3tc/bc1/_bindings.cpp index 06fe4c2..02b75b0 100644 --- a/quicktex/s3tc/bc1/_bindings.cpp +++ b/quicktex/s3tc/bc1/_bindings.cpp @@ -45,37 +45,14 @@ void InitBC1(py::module_ &s3tc) { auto block_decoder = py::type::of(); // BC1Encoder - py::class_ bc1_encoder(bc1, "BC1Encoder", block_encoder); + py::class_ bc1_encoder(bc1, "BC1Encoder", block_encoder, "Encodes RGB textures to BC1"); - bc1_encoder.def(py::init(), "level"_a = 5, "use_3color"_a = true, "use_3color_black"_a = true); - bc1_encoder.def(py::init(), "level"_a, "use_3color"_a, "use_3color_black"_a, "interpolator"_a); + bc1_encoder.def(py::init(), "level"_a = 5, "color_mode"_a = BC1Encoder::ColorMode::FourColor); + bc1_encoder.def(py::init(), "level"_a, "color_mode"_a, "interpolator"_a); - bc1_encoder.def("set_level", &BC1Encoder::SetLevel); - bc1_encoder.def_property_readonly("interpolator_type", &BC1Encoder::GetInterpolatorType); - bc1_encoder.def_property("flags", &BC1Encoder::GetFlags, &BC1Encoder::SetFlags); - bc1_encoder.def_property("error_mode", &BC1Encoder::GetErrorMode, &BC1Encoder::SetErrorMode); - bc1_encoder.def_property("endpoint_mode", &BC1Encoder::GetEndpointMode, &BC1Encoder::SetEndpointMode); - bc1_encoder.def_property("search_rounds", &BC1Encoder::GetSearchRounds, &BC1Encoder::SetSearchRounds); - bc1_encoder.def_property("orderings_4", &BC1Encoder::GetOrderings4, &BC1Encoder::SetOrderings4); - bc1_encoder.def_property("orderings_3", &BC1Encoder::GetOrderings3, &BC1Encoder::SetOrderings3); + bc1_encoder.def("set_level", &BC1Encoder::SetLevel, "Use a preset quality level, between 0 and 18. For better control, see the advanced API below"); - using Flags = BC1Encoder::Flags; - py::enum_(bc1_encoder, "Flags", py::arithmetic()) - .value("UseLikelyTotalOrderings", Flags::UseLikelyTotalOrderings) - .value("TwoLeastSquaresPasses", Flags::TwoLeastSquaresPasses) - .value("Use3ColorBlocksForBlackPixels", Flags::Use3ColorBlocksForBlackPixels) - .value("Use3ColorBlocks", Flags::Use3ColorBlocks) - .value("Iterative", Flags::Iterative) - .value("Use6PowerIters", Flags::Use6PowerIters) - .value("Exhaustive", Flags::Exhaustive) - .value("TryAllInitialEndpoints", Flags::TryAllInitialEndpoints) - .def("__invert__", [](Flags f1) { return ~unsigned(f1); }) - .def("__and__", [](Flags f1, Flags f2) { return unsigned(f1) & unsigned(f2); }) - .def("__rand__", [](Flags f1, Flags f2) { return unsigned(f1) & unsigned(f2); }) - .def("__or__", [](Flags f1, Flags f2) { return unsigned(f1) | unsigned(f2); }) - .def("__ror__", [](Flags f1, Flags f2) { return unsigned(f1) | unsigned(f2); }) - .def("__xor__", [](Flags f1, Flags f2) { return unsigned(f1) ^ unsigned(f2); }) - .def("__rxor__", [](Flags f1, Flags f2) { return unsigned(f2) ^ unsigned(f1); }); + // Advanced API py::enum_(bc1_encoder, "EndpointMode") .value("LeastSquares", BC1Encoder::EndpointMode::LeastSquares) @@ -89,13 +66,53 @@ void InitBC1(py::module_ &s3tc) { .value("Check2", BC1Encoder::ErrorMode::Check2) .value("Full", BC1Encoder::ErrorMode::Full); + py::enum_(bc1_encoder, "ColorMode") + .value("FourColor", BC1Encoder::ColorMode::FourColor, "Default color mode. Only 4-color blocks will be output, where color0 > color1") + .value("ThreeColor", BC1Encoder::ColorMode::ThreeColor) + .value("ThreeColorBlack", BC1Encoder::ColorMode::ThreeColorBlack); + + bc1_encoder.def_readonly_static("max_power_iterations", &BC1Encoder::max_power_iterations, "Maximum value of :py:attr:`BC1Encoder.power_iterations`."); + bc1_encoder.def_readonly_static("min_power_iterations", &BC1Encoder::min_power_iterations, "Minimum value of :py:attr:`BC1Encoder.power_iterations`."); + + bc1_encoder.def_property_readonly("interpolator", &BC1Encoder::GetInterpolator, "The interpolator used by this encoder. This is a readonly property."); + bc1_encoder.def_property_readonly("color_mode", &BC1Encoder::GetColorMode, "The color mode used by this encoder. This is a readonly property."); + + bc1_encoder.def_property("error_mode", &BC1Encoder::GetErrorMode, &BC1Encoder::SetErrorMode, "The error mode used by this encoder for finding selectors."); + bc1_encoder.def_property("endpoint_mode", &BC1Encoder::GetEndpointMode, &BC1Encoder::SetEndpointMode, "The endpoint mode used by this encoder."); + + bc1_encoder.def_readwrite("two_ls_passes", &BC1Encoder::two_ls_passes, + "Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option).\n" + "Recommended if you're setting the orderings settings greater than 0."); + + bc1_encoder.def_readwrite("two_ep_passes", &BC1Encoder::two_ep_passes, "Try 2 different ways of choosing the initial endpoints."); + + bc1_encoder.def_readwrite("two_cf_passes", &BC1Encoder::two_cf_passes, + "Greatly increase encode time, with very slightly higher quality.\n" + "Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, " + "unless you just don't care about performance at all."); + + bc1_encoder.def_readwrite("exhaustive", &BC1Encoder::exhaustive, + "Check all total orderings - *very* slow. The encoder is not designed to be used in this way"); + + bc1_encoder.def_property("search_rounds", &BC1Encoder::GetSearchRounds, &BC1Encoder::SetSearchRounds, + "Setting search rounds > 0 enables refining the final endpoints by examining nearby colors. A higher value has a higher quality " + "at the expense of performance."); + + bc1_encoder.def_property("orderings", &BC1Encoder::GetOrderings, &BC1Encoder::SetOrderings, + "setting the orderings > 0 enables ordered cluster fit using a lookup table of similar blocks. Value is a tuple of (4 color " + "orders, 3 color orders), where higher values have a higher quality at the expense of performance."); + + bc1_encoder.def_property("power_iterations", &BC1Encoder::GetPowerIterations, &BC1Encoder::SetPowerIterations, + "Number of power iterations used with the PCA endpoint mode. Value should be around 4 to 6. " + "Automatically clamped to between :py:const:`BC1Encoder.min_power_iterations` and :py:const:`BC1Encoder.max_power_iterations`"); + // BC1Decoder - py::class_ bc1_decoder(bc1, "BC1Decoder", block_decoder); + py::class_ bc1_decoder(bc1, "BC1Decoder", block_decoder, "Decodes BC1 textures to RGB"); bc1_decoder.def(py::init(), "write_alpha"_a = false); bc1_decoder.def(py::init(), "write_alpha"_a, "interpolator"_a); - bc1_decoder.def_property_readonly("interpolator_type", &BC1Decoder::GetInterpolatorType); + bc1_decoder.def_property_readonly("interpolator", &BC1Decoder::GetInterpolator); bc1_decoder.def_readwrite("write_alpha", &BC1Decoder::write_alpha); } } // namespace quicktex::bindings \ No newline at end of file diff --git a/quicktex/s3tc/bc3/BC3Decoder.cpp b/quicktex/s3tc/bc3/BC3Decoder.cpp index eeaa628..085f253 100644 --- a/quicktex/s3tc/bc3/BC3Decoder.cpp +++ b/quicktex/s3tc/bc3/BC3Decoder.cpp @@ -28,7 +28,7 @@ namespace quicktex::s3tc { void BC3Decoder::DecodeBlock(Color4x4 dest, BC3Block *const block) const noexcept(ndebug) { - _bc1_decoder->DecodeBlock(dest, &(block->color_block)); + _bc1_decoder->DecodeBlock(dest, &(block->color_block), false); _bc4_decoder->DecodeBlock(dest, &(block->alpha_block), 3); } } // namespace quicktex::s3tc \ No newline at end of file diff --git a/quicktex/s3tc/bc3/BC3Decoder.h b/quicktex/s3tc/bc3/BC3Decoder.h index 981c8e6..1ab7b0f 100644 --- a/quicktex/s3tc/bc3/BC3Decoder.h +++ b/quicktex/s3tc/bc3/BC3Decoder.h @@ -37,9 +37,9 @@ class BC3Decoder : public BlockDecoderTemplate { using BC4DecoderPtr = std::shared_ptr; using InterpolatorPtr = std::shared_ptr; - BC3Decoder(BC1DecoderPtr bc1_decoder) : _bc1_decoder(bc1_decoder), _bc4_decoder(std::make_shared(3)) {} + BC3Decoder(InterpolatorPtr interpolator) : _bc1_decoder(std::make_shared(interpolator)), _bc4_decoder(std::make_shared(3)) {} - BC3Decoder(InterpolatorPtr interpolator = std::make_shared()) : BC3Decoder(std::make_shared(interpolator)) {} + BC3Decoder() : BC3Decoder(std::make_shared()) {} void DecodeBlock(Color4x4 dest, BC3Block *const block) const noexcept(ndebug) override; diff --git a/quicktex/s3tc/bc3/BC3Encoder.h b/quicktex/s3tc/bc3/BC3Encoder.h index 3a47197..58caea2 100644 --- a/quicktex/s3tc/bc3/BC3Encoder.h +++ b/quicktex/s3tc/bc3/BC3Encoder.h @@ -36,13 +36,10 @@ class BC3Encoder : public BlockEncoderTemplate { using BC4EncoderPtr = std::shared_ptr; using InterpolatorPtr = std::shared_ptr; - BC3Encoder(BC1EncoderPtr bc1_encoder) : _bc1_encoder(bc1_encoder), _bc4_encoder(std::make_shared(3)) {} + BC3Encoder(unsigned level, InterpolatorPtr interpolator) + : _bc1_encoder(std::make_shared(level, BC1Encoder::ColorMode::FourColor, interpolator)), _bc4_encoder(std::make_shared(3)) {} - BC3Encoder(unsigned level, bool allow_3color, bool allow_3color_black, InterpolatorPtr interpolator) - : BC3Encoder(std::make_shared(level, allow_3color, allow_3color_black, interpolator)) {} - - BC3Encoder(unsigned level = 5, bool allow_3color = true, bool allow_3color_black = true) - : BC3Encoder(std::make_shared(level, allow_3color, allow_3color_black, std::make_shared())) {} + BC3Encoder(unsigned level = 5) : BC3Encoder(level, std::make_shared()) {} void EncodeBlock(Color4x4 pixels, BC3Block *dest) const override; diff --git a/quicktex/s3tc/bc3/_bindings.cpp b/quicktex/s3tc/bc3/_bindings.cpp index e1a6abf..e3609c1 100644 --- a/quicktex/s3tc/bc3/_bindings.cpp +++ b/quicktex/s3tc/bc3/_bindings.cpp @@ -40,7 +40,6 @@ using InterpolatorPtr = std::shared_ptr; using BC1EncoderPtr = std::shared_ptr; using BC1DecoderPtr = std::shared_ptr; - void InitBC3(py::module_ &s3tc) { auto bc3 = s3tc.def_submodule("_bc3", "BC3 encoding/decoding module"); auto block_encoder = py::type::of(); @@ -49,9 +48,8 @@ void InitBC3(py::module_ &s3tc) { // BC3Encoder py::class_ bc3_encoder(bc3, "BC3Encoder", block_encoder); - bc3_encoder.def(py::init(), "bc1_encoder"_a); - bc3_encoder.def(py::init(), "level"_a = 5, "use_3color"_a = true, "use_3color_black"_a = true); - bc3_encoder.def(py::init(), "level"_a, "use_3color"_a, "use_3color_black"_a, "interpolator"_a); + bc3_encoder.def(py::init(), "level"_a = 5); + bc3_encoder.def(py::init(), "level"_a, "interpolator"_a); bc3_encoder.def_property_readonly("bc1_encoder", &BC3Encoder::GetBC1Encoder); bc3_encoder.def_property_readonly("bc4_encoder", &BC3Encoder::GetBC4Encoder); @@ -60,7 +58,6 @@ void InitBC3(py::module_ &s3tc) { py::class_ bc3_decoder(bc3, "BC3Decoder", block_decoder); bc3_decoder.def(py::init<>()); - bc3_decoder.def(py::init(), "bc1_decoder"_a); bc3_decoder.def(py::init(), "interpolator"_a); bc3_decoder.def_property_readonly("bc1_decoder", &BC3Decoder::GetBC1Decoder); diff --git a/quicktex/s3tc/bc5/_bindings.cpp b/quicktex/s3tc/bc5/_bindings.cpp index 3edcf1b..7d58da5 100644 --- a/quicktex/s3tc/bc5/_bindings.cpp +++ b/quicktex/s3tc/bc5/_bindings.cpp @@ -20,10 +20,7 @@ #include #include -#include #include -#include -#include #include "../../BlockDecoder.h" #include "../../BlockEncoder.h" diff --git a/quicktex/s3tc/interpolator/Interpolator.cpp b/quicktex/s3tc/interpolator/Interpolator.cpp index 200f71e..6312c10 100644 --- a/quicktex/s3tc/interpolator/Interpolator.cpp +++ b/quicktex/s3tc/interpolator/Interpolator.cpp @@ -25,6 +25,7 @@ #include #include "../../util.h" +#include "../../Color.h" namespace quicktex::s3tc { @@ -49,8 +50,9 @@ uint8_t Interpolator::Interpolate6(uint8_t v0, uint8_t v1) const { return Interp uint8_t Interpolator::InterpolateHalf5(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); } uint8_t Interpolator::InterpolateHalf6(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); } -std::array Interpolator::InterpolateBC1(uint16_t low, uint16_t high) const { - return InterpolateBC1(Color::Unpack565Unscaled(low), Color::Unpack565Unscaled(high), (high >= low)); +std::array Interpolator::InterpolateBC1(uint16_t low, uint16_t high, bool allow_3color) const { + bool use_3color = allow_3color && (high >= low); + return InterpolateBC1(Color::Unpack565Unscaled(low), Color::Unpack565Unscaled(high), use_3color); } std::array Interpolator::InterpolateBC1(Color low, Color high, bool use_3color) const { diff --git a/quicktex/s3tc/interpolator/Interpolator.h b/quicktex/s3tc/interpolator/Interpolator.h index 59ffe42..b4b91a5 100644 --- a/quicktex/s3tc/interpolator/Interpolator.h +++ b/quicktex/s3tc/interpolator/Interpolator.h @@ -96,7 +96,7 @@ class Interpolator { * @param high second 5:6:5 color for the block * @return and array of 4 Color values, with indices matching BC1 selectors */ - virtual std::array InterpolateBC1(uint16_t low, uint16_t high) const; + virtual std::array InterpolateBC1(uint16_t low, uint16_t high, bool allow_3color = true) const; virtual std::array InterpolateBC1(Color low, Color high, bool use_3color) const; diff --git a/quicktex/s3tc/interpolator/_bindings.cpp b/quicktex/s3tc/interpolator/_bindings.cpp index 7918431..4e2a384 100644 --- a/quicktex/s3tc/interpolator/_bindings.cpp +++ b/quicktex/s3tc/interpolator/_bindings.cpp @@ -20,6 +20,7 @@ #include #include +#include #include "Interpolator.h"