From af7860c06ad9c56882f42424dc094b2712b8b7af Mon Sep 17 00:00:00 2001 From: drewcassidy Date: Thu, 4 Mar 2021 01:18:30 -0800 Subject: [PATCH] Make encoder constructable --- src/BC1/BC1Encoder.cpp | 184 ++++++++++++++++++++++++++++++++++++++--- src/BC1/BC1Encoder.h | 113 ++++++++++++++++--------- src/BC1/Histogram.h | 9 +- src/BlockView.h | 2 +- src/test/test.cpp | 2 +- 5 files changed, 256 insertions(+), 54 deletions(-) diff --git a/src/BC1/BC1Encoder.cpp b/src/BC1/BC1Encoder.cpp index e6b3d06..ed9be84 100644 --- a/src/BC1/BC1Encoder.cpp +++ b/src/BC1/BC1Encoder.cpp @@ -44,14 +44,14 @@ using namespace BC1; using ColorMode = BC1Encoder::ColorMode; // constructors + BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) { - _flags = - Flags::UseFullMSEEval | Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks | Flags::Use3ColorBlocksForBlackPixels; - _error_mode = ErrorMode::Full; + _flags = Flags::None; + _error_mode = ErrorMode::Check2; _endpoint_mode = EndpointMode::PCA; - _orderings4 = 128; - _orderings3 = 32; - _search_rounds = 256; + _search_rounds = 0; + _orderings3 = 1; + _orderings4 = 1; OrderTable<3>::Generate(); OrderTable<4>::Generate(); @@ -60,6 +60,165 @@ BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolato assert(OrderTable<4>::generated); } +BC1Encoder::BC1Encoder(unsigned int level, bool allow_3color, bool allow_3color_black) : BC1Encoder(Interpolator::MakeInterpolator()) { + SetLevel(level, allow_3color, allow_3color_black); +} + +BC1Encoder::BC1Encoder(InterpolatorPtr interpolator, unsigned level, bool allow_3color, bool allow_3color_black) : BC1Encoder(interpolator) { + SetLevel(level, allow_3color, allow_3color_black); +} + +BC1Encoder::BC1Encoder(InterpolatorPtr interpolator, Flags flags, ErrorMode error_mode, EndpointMode endpoint_mode, unsigned search_rounds, unsigned orderings4, + unsigned orderings3) + : BC1Encoder(interpolator) { + SetFlags(flags); + SetErrorMode(error_mode); + SetEndpointMode(endpoint_mode); + SetSearchRounds(search_rounds); + SetOrderings(orderings4, orderings3); +} + +// Getters and Setters +void BC1Encoder::SetLevel(unsigned level, bool allow_3color, bool allow_3color_black) { + _flags = Flags::None; + _error_mode = ErrorMode::Check2; + _endpoint_mode = EndpointMode::PCA; + _search_rounds = 0; + _orderings3 = 1; + _orderings4 = 1; + + switch (level) { + case 0: + // Faster/higher quality than stb_dxt default. + _endpoint_mode = EndpointMode::BoundingBoxInt; + break; + case 1: + // Faster/higher quality than stb_dxt default. a bit higher average quality vs. mode 0. + _endpoint_mode = EndpointMode::LeastSquares; + break; + case 2: + // On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures). + // Slightly stronger than stb_dxt. + // Uses default settings. + break; + case 3: + // Slightly stronger than stb_dxt HIGHQUAL. + _flags = Flags::TwoLeastSquaresPasses; + break; + case 4: + _flags = Flags::TwoLeastSquaresPasses | Flags::Use6PowerIters; + _error_mode = ErrorMode::Full; + break; + default: + case 5: + // stb_dxt HIGHQUAL + permit 3 color (if it's enabled). + _flags = Flags::TwoLeastSquaresPasses; + _error_mode = ErrorMode::Faster; + break; + case 6: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + _error_mode = ErrorMode::Faster; + break; + case 7: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + _error_mode = ErrorMode::Faster; + _orderings4 = 4; + break; + case 8: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + _error_mode = ErrorMode::Faster; + _orderings4 = 8; + break; + case 9: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + _error_mode = ErrorMode::Check2; + _orderings4 = 11; + _orderings3 = 3; + break; + case 10: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + _error_mode = ErrorMode::Check2; + _orderings4 = 20; + _orderings3 = 8; + break; + case 11: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + _error_mode = ErrorMode::Check2; + _orderings4 = 28; + _orderings3 = 16; + break; + case 12: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings; + _error_mode = ErrorMode::Check2; + _orderings4 = 32; + _orderings3 = 32; + break; + case 13: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + _error_mode = ErrorMode::Full; + _orderings4 = 32; + _orderings3 = 32; + _search_rounds = 20; + break; + case 14: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + _error_mode = ErrorMode::Full; + _orderings4 = 32; + _orderings3 = 32; + _search_rounds = 32; + break; + case 15: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + _error_mode = ErrorMode::Full; + _orderings4 = 56; + _orderings3 = 32; + _search_rounds = 32; + break; + case 16: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + _error_mode = ErrorMode::Full; + _orderings4 = 80; + _orderings3 = 32; + _search_rounds = 256; + break; + case 17: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints; + _error_mode = ErrorMode::Full; + _orderings4 = 128; + _orderings3 = 32; + _search_rounds = 256; + break; + case 18: + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints | Flags::Iterative; + _error_mode = ErrorMode::Full; + _orderings4 = 128; + _orderings3 = 32; + _search_rounds = 256; + break; + case 19: + // This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training. + _flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints | Flags::Iterative | + Flags::Exhaustive; + _error_mode = ErrorMode::Full; + _orderings4 = 128; + _orderings3 = 32; + _search_rounds = 256; + break; + } + + if (level >= 5 && allow_3color) { _flags |= Flags::Use3ColorBlocks; } + if (level >= 5 && allow_3color_black) { _flags |= Flags::Use3ColorBlocksForBlackPixels; } + + _orderings4 = clamp(_orderings4, 1U, OrderTable<4>::BestOrderCount); + _orderings3 = clamp(_orderings3, 1U, OrderTable<3>::BestOrderCount); +} + +void BC1Encoder::SetOrderings(unsigned orderings4, unsigned orderings3) { + _orderings4 = clamp(orderings4, 1U, OrderTable<4>::BestOrderCount); + _orderings3 = clamp(orderings3, 1U, OrderTable<3>::BestOrderCount); +} + +// Public methods void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { if (pixels.IsSingleColor()) { // single-color pixel block, do it the fast way @@ -69,11 +228,14 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { auto metrics = pixels.GetMetrics(); - bool needs_block_error = (_flags & Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks | Flags::UseFullMSEEval) != Flags::None; + bool needs_block_error = (_flags & Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks) != Flags::None; + needs_block_error |= (_error_mode != ErrorMode::None); needs_block_error |= (_search_rounds > 0); needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None); ErrorMode error_mode = needs_block_error ? _error_mode : ErrorMode::None; + assert(!((_error_mode == ErrorMode::None) && needs_block_error)); + const unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1; const unsigned total_ep_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1; const unsigned total_cf_iters = (_flags & Flags::Iterative) != Flags::None ? 2 : 1; @@ -133,12 +295,12 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { } // refine endpoints by searching for nearby colors - if (result.error > 0 && _search_rounds > 0) { EndpointSearch(pixels, result); - } + if (result.error > 0 && _search_rounds > 0) { EndpointSearch(pixels, result); } WriteBlock(result, dest); } +// Private methods void BC1Encoder::WriteBlockSolid(Color color, BC1Block *dest) const { uint8_t mask = 0xAA; // 2222 uint16_t min16, max16; @@ -763,7 +925,7 @@ void BC1Encoder::EndpointSearch(Color4x4 &pixels, EncodeResults &block) const { for (unsigned i = 0; i < _search_rounds; i++) { const unsigned voxel_index = (unsigned)(i & 15); - assert((unsigned)Voxels[(unsigned)Voxels[voxel_index][3]][3] == voxel_index); // make sure voxels are symmetrical + assert((unsigned)Voxels[(unsigned)Voxels[voxel_index][3]][3] == voxel_index); // make sure voxels are symmetrical if ((int)(i & 31) == forbidden_direction) continue; @@ -780,7 +942,7 @@ void BC1Encoder::EndpointSearch(Color4x4 &pixels, EncodeResults &block) const { trial_result.high.b = (uint8_t)clamp(trial_result.high.b + delta[2], 0, 31); } - switch(block.color_mode) { + switch (block.color_mode) { default: case ColorMode::FourColor: FindSelectors(pixels, trial_result, _error_mode); diff --git a/src/BC1/BC1Encoder.h b/src/BC1/BC1Encoder.h index 332c406..f1fb2c5 100644 --- a/src/BC1/BC1Encoder.h +++ b/src/BC1/BC1Encoder.h @@ -38,63 +38,42 @@ class BC1Encoder final : public BlockEncoder { public: using InterpolatorPtr = std::shared_ptr; - enum class Flags : uint32_t { + enum class Flags { None = 0, // Try to improve quality using the most likely total orderings. // The total_orderings_to_try parameter will then control the number of total orderings to try for 4 color blocks, and the // total_orderings_to_try3 parameter will control the number of total orderings to try for 3 color blocks (if they are enabled). - UseLikelyTotalOrderings = 2, + UseLikelyTotalOrderings = 1, // Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option). // Recommended if you're enabling UseLikelyTotalOrderings. - TwoLeastSquaresPasses = 4, + TwoLeastSquaresPasses = 2, // Use3ColorBlocksForBlackPixels allows the BC1 encoder to use 3-color blocks for blocks containing black or very dark pixels. // You shader/engine MUST ignore the alpha channel on textures encoded with this flag. // Average quality goes up substantially for my 100 texture corpus (~.5 dB), so it's worth using if you can. // Note the BC1 encoder does not actually support transparency in 3-color mode. // Don't set when encoding to BC3. - Use3ColorBlocksForBlackPixels = 8, + Use3ColorBlocksForBlackPixels = 4, // If Use3ColorBlocks is set, the encoder can use 3-color mode for a small but noticeable gain in average quality, but lower perf. // If you also specify the UseLikelyTotalOrderings flag, set the total_orderings_to_try3 paramter to the number of total orderings to try. // Don't set when encoding to BC3. - Use3ColorBlocks = 16, + Use3ColorBlocks = 8, // Iterative will greatly increase encode time, but is very slightly higher quality. // Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, unless you just don't care about perf. at all. - Iterative = 32, - - // BoundingBox enables a fast all-integer PCA approximation on 4-color blocks. - // At level 0 options (no other flags), this is ~15% faster, and higher *average* quality. - BoundingBox = 64, - - // Use a slightly lower quality, but ~30% faster MSE evaluation function for 4-color blocks. - UseFasterMSEEval = 128, - - // Examine all colors to compute selectors/MSE (slower than default) - UseFullMSEEval = 256, - - // Use 2D least squares+inset+optimal rounding (the method used in Humus's GPU texture encoding demo), instead of PCA. - // Around 18% faster, very slightly lower average quality to better (depends on the content). - Use2DLS = 512, + Iterative = 16, // Use 6 power iterations vs. 4 for PCA. - Use6PowerIters = 2048, + Use6PowerIters = 32, // Check all total orderings - *very* slow. The encoder is not designed to be used in this way. - Exhaustive = 8192, + Exhaustive = 64, // Try 2 different ways of choosing the initial endpoints. - TryAllInitialEndpoints = 16384, - - // Same as BoundingBox, but implemented using integer math (faster, slightly less quality) - BoundingBoxInt = 32768, - - // Try refining the final endpoints by examining nearby colors. - EndpointSearchRoundsShift = 22, - EndpointSearchRoundsMask = 1023U << EndpointSearchRoundsShift, + TryAllInitialEndpoints = 128, }; enum class ColorMode { @@ -108,8 +87,70 @@ class BC1Encoder final : public BlockEncoder { FourColorSolid = FourColor | Solid, }; - enum class ErrorMode { None, Faster, Check2, Full }; - enum class EndpointMode { LeastSquares, BoundingBox, BoundingBoxInt, PCA }; + enum class ErrorMode { + // Perform no error checking at all. + None, + + // Use a slightly lower quality, but ~30% faster MSE evaluation function for 4-color blocks. + Faster, + + // Default error mode. + Check2, + + // Examine all colors to compute selectors/MSE (slower than default). + Full + }; + + enum class EndpointMode { + // Use 2D least squares+inset+optimal rounding (the method used in Humus's GPU texture encoding demo), instead of PCA. + // Around 18% faster, very slightly lower average quality to better (depends on the content). + LeastSquares, + + // BoundingBox enables a fast all-integer PCA approximation on 4-color blocks. + // At level 0 options (no other flags), this is ~15% faster, and higher *average* quality. + BoundingBox, + + // Same as BoundingBox, but implemented using integer math (faster, slightly less quality) + BoundingBoxInt, + + // Full PCA implementation + PCA + }; + + BC1Encoder(InterpolatorPtr interpolator); + + BC1Encoder(unsigned level = 5, bool allow_3color = true, bool allow_3color_black = true); + + BC1Encoder(InterpolatorPtr interpolator, unsigned level, bool allow_3color = true, bool allow_3color_black = true); + + BC1Encoder(InterpolatorPtr interpolator, Flags flags, ErrorMode error_mode = ErrorMode::Full, EndpointMode endpoint_mode = EndpointMode::PCA, + unsigned search_rounds = 16, unsigned orderings4 = 32, unsigned orderings3 = 32); + + const InterpolatorPtr &GetInterpolator() const; + + void SetLevel(unsigned level, bool allow_3color = true, bool allow_3color_black = true); + + Flags GetFlags() const { return _flags; } + void SetFlags(Flags flags) { _flags = flags; }; + + ErrorMode GetErrorMode() const { return _error_mode; } + void SetErrorMode(ErrorMode error_mode) { _error_mode = error_mode; }; + + EndpointMode GetEndpointMode() const { return _endpoint_mode; } + void SetEndpointMode(EndpointMode endpoint_mode) { _endpoint_mode = endpoint_mode; } + + unsigned int GetSearchRounds() const { return _search_rounds; } + void SetSearchRounds(unsigned search_rounds) { _search_rounds = search_rounds; } + + unsigned int GetOrderings4() const { return _orderings4; } + unsigned int GetOrderings3() const { return _orderings3; } + void SetOrderings(unsigned orderings4, unsigned orderings3); + + void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override; + + private: + using Hash = uint16_t; + using BlockMetrics = Color4x4::BlockMetrics; // Unpacked BC1 block with metadata struct EncodeResults { @@ -120,14 +161,6 @@ class BC1Encoder final : public BlockEncoder { unsigned error = UINT_MAX; }; - BC1Encoder(InterpolatorPtr interpolator); - - void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override; - - private: - using Hash = uint16_t; - using BlockMetrics = Color4x4::BlockMetrics; - const InterpolatorPtr _interpolator; // match tables used for single-color blocks diff --git a/src/BC1/Histogram.h b/src/BC1/Histogram.h index 4611291..9aa40bf 100644 --- a/src/BC1/Histogram.h +++ b/src/BC1/Histogram.h @@ -69,7 +69,14 @@ template class Histogram { } unsigned GetPacked() const { - return Pack(_bins); + Hash packed = 0; + + for (unsigned i = 0; i < (N-1); i++) { + assert(_bins[i] <= (1U << 4) - 1U); + packed |= static_cast(_bins[i]) << (i * 4U); + } + + return packed; } private: diff --git a/src/BlockView.h b/src/BlockView.h index 4abc360..9721fbe 100644 --- a/src/BlockView.h +++ b/src/BlockView.h @@ -158,7 +158,7 @@ template class ColorBlockView : public BlockView 0) metrics.avg = (metrics.sums + Vector4Int(total / 2)) / total; // half-total added for better rounding + if (total > 0) metrics.avg = (metrics.sums + Vector4Int(total / 2)) / (int)total; // half-total added for better rounding return metrics; } diff --git a/src/test/test.cpp b/src/test/test.cpp index 87d2221..469b596 100644 --- a/src/test/test.cpp +++ b/src/test/test.cpp @@ -673,7 +673,7 @@ int main(int argc, char *argv[]) { for (int i = 0; i < test_count; i++) bc4_encoder.EncodeImage(reinterpret_cast(&packed_image8[0]), src, source_image.width(), source_image.height()); } else if (dxgi_format == DXGI_FORMAT_BC1_UNORM) { - auto bc1_encoder = BC1Encoder(Interpolator::MakeInterpolator()); + auto bc1_encoder = BC1Encoder(bc1_quality_level, use_bc1_3color_mode, use_bc1_3color_mode_for_black); Color *src = &source_image.get_pixels()[0]; bc1_encoder.EncodeImage(reinterpret_cast(&packed_image8[0]), src, source_image.width(), source_image.height());