Make encoder constructable

This commit is contained in:
Andrew Cassidy 2021-03-04 01:18:30 -08:00
parent be5a439b08
commit af7860c06a
5 changed files with 256 additions and 54 deletions

View File

@ -44,14 +44,14 @@ using namespace BC1;
using ColorMode = BC1Encoder::ColorMode;
// constructors
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) {
_flags =
Flags::UseFullMSEEval | Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks | Flags::Use3ColorBlocksForBlackPixels;
_error_mode = ErrorMode::Full;
_flags = Flags::None;
_error_mode = ErrorMode::Check2;
_endpoint_mode = EndpointMode::PCA;
_orderings4 = 128;
_orderings3 = 32;
_search_rounds = 256;
_search_rounds = 0;
_orderings3 = 1;
_orderings4 = 1;
OrderTable<3>::Generate();
OrderTable<4>::Generate();
@ -60,6 +60,165 @@ BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolato
assert(OrderTable<4>::generated);
}
BC1Encoder::BC1Encoder(unsigned int level, bool allow_3color, bool allow_3color_black) : BC1Encoder(Interpolator::MakeInterpolator()) {
SetLevel(level, allow_3color, allow_3color_black);
}
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator, unsigned level, bool allow_3color, bool allow_3color_black) : BC1Encoder(interpolator) {
SetLevel(level, allow_3color, allow_3color_black);
}
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator, Flags flags, ErrorMode error_mode, EndpointMode endpoint_mode, unsigned search_rounds, unsigned orderings4,
unsigned orderings3)
: BC1Encoder(interpolator) {
SetFlags(flags);
SetErrorMode(error_mode);
SetEndpointMode(endpoint_mode);
SetSearchRounds(search_rounds);
SetOrderings(orderings4, orderings3);
}
// Getters and Setters
void BC1Encoder::SetLevel(unsigned level, bool allow_3color, bool allow_3color_black) {
_flags = Flags::None;
_error_mode = ErrorMode::Check2;
_endpoint_mode = EndpointMode::PCA;
_search_rounds = 0;
_orderings3 = 1;
_orderings4 = 1;
switch (level) {
case 0:
// Faster/higher quality than stb_dxt default.
_endpoint_mode = EndpointMode::BoundingBoxInt;
break;
case 1:
// Faster/higher quality than stb_dxt default. a bit higher average quality vs. mode 0.
_endpoint_mode = EndpointMode::LeastSquares;
break;
case 2:
// On average mode 2 is a little weaker than modes 0/1, but it's stronger on outliers (very tough textures).
// Slightly stronger than stb_dxt.
// Uses default settings.
break;
case 3:
// Slightly stronger than stb_dxt HIGHQUAL.
_flags = Flags::TwoLeastSquaresPasses;
break;
case 4:
_flags = Flags::TwoLeastSquaresPasses | Flags::Use6PowerIters;
_error_mode = ErrorMode::Full;
break;
default:
case 5:
// stb_dxt HIGHQUAL + permit 3 color (if it's enabled).
_flags = Flags::TwoLeastSquaresPasses;
_error_mode = ErrorMode::Faster;
break;
case 6:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
_error_mode = ErrorMode::Faster;
break;
case 7:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
_error_mode = ErrorMode::Faster;
_orderings4 = 4;
break;
case 8:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
_error_mode = ErrorMode::Faster;
_orderings4 = 8;
break;
case 9:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
_error_mode = ErrorMode::Check2;
_orderings4 = 11;
_orderings3 = 3;
break;
case 10:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
_error_mode = ErrorMode::Check2;
_orderings4 = 20;
_orderings3 = 8;
break;
case 11:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
_error_mode = ErrorMode::Check2;
_orderings4 = 28;
_orderings3 = 16;
break;
case 12:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings;
_error_mode = ErrorMode::Check2;
_orderings4 = 32;
_orderings3 = 32;
break;
case 13:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
_error_mode = ErrorMode::Full;
_orderings4 = 32;
_orderings3 = 32;
_search_rounds = 20;
break;
case 14:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
_error_mode = ErrorMode::Full;
_orderings4 = 32;
_orderings3 = 32;
_search_rounds = 32;
break;
case 15:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
_error_mode = ErrorMode::Full;
_orderings4 = 56;
_orderings3 = 32;
_search_rounds = 32;
break;
case 16:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
_error_mode = ErrorMode::Full;
_orderings4 = 80;
_orderings3 = 32;
_search_rounds = 256;
break;
case 17:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints;
_error_mode = ErrorMode::Full;
_orderings4 = 128;
_orderings3 = 32;
_search_rounds = 256;
break;
case 18:
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints | Flags::Iterative;
_error_mode = ErrorMode::Full;
_orderings4 = 128;
_orderings3 = 32;
_search_rounds = 256;
break;
case 19:
// This hidden mode is *extremely* slow and abuses the encoder. It's just for testing/training.
_flags = Flags::TwoLeastSquaresPasses | Flags::UseLikelyTotalOrderings | Flags::Use6PowerIters | Flags::TryAllInitialEndpoints | Flags::Iterative |
Flags::Exhaustive;
_error_mode = ErrorMode::Full;
_orderings4 = 128;
_orderings3 = 32;
_search_rounds = 256;
break;
}
if (level >= 5 && allow_3color) { _flags |= Flags::Use3ColorBlocks; }
if (level >= 5 && allow_3color_black) { _flags |= Flags::Use3ColorBlocksForBlackPixels; }
_orderings4 = clamp(_orderings4, 1U, OrderTable<4>::BestOrderCount);
_orderings3 = clamp(_orderings3, 1U, OrderTable<3>::BestOrderCount);
}
void BC1Encoder::SetOrderings(unsigned orderings4, unsigned orderings3) {
_orderings4 = clamp(orderings4, 1U, OrderTable<4>::BestOrderCount);
_orderings3 = clamp(orderings3, 1U, OrderTable<3>::BestOrderCount);
}
// Public methods
void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
if (pixels.IsSingleColor()) {
// single-color pixel block, do it the fast way
@ -69,11 +228,14 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
auto metrics = pixels.GetMetrics();
bool needs_block_error = (_flags & Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks | Flags::UseFullMSEEval) != Flags::None;
bool needs_block_error = (_flags & Flags::UseLikelyTotalOrderings | Flags::Use3ColorBlocks) != Flags::None;
needs_block_error |= (_error_mode != ErrorMode::None);
needs_block_error |= (_search_rounds > 0);
needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None);
ErrorMode error_mode = needs_block_error ? _error_mode : ErrorMode::None;
assert(!((_error_mode == ErrorMode::None) && needs_block_error));
const unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
const unsigned total_ep_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1;
const unsigned total_cf_iters = (_flags & Flags::Iterative) != Flags::None ? 2 : 1;
@ -133,12 +295,12 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
}
// refine endpoints by searching for nearby colors
if (result.error > 0 && _search_rounds > 0) { EndpointSearch(pixels, result);
}
if (result.error > 0 && _search_rounds > 0) { EndpointSearch(pixels, result); }
WriteBlock(result, dest);
}
// Private methods
void BC1Encoder::WriteBlockSolid(Color color, BC1Block *dest) const {
uint8_t mask = 0xAA; // 2222
uint16_t min16, max16;
@ -763,7 +925,7 @@ void BC1Encoder::EndpointSearch(Color4x4 &pixels, EncodeResults &block) const {
for (unsigned i = 0; i < _search_rounds; i++) {
const unsigned voxel_index = (unsigned)(i & 15);
assert((unsigned)Voxels[(unsigned)Voxels[voxel_index][3]][3] == voxel_index); // make sure voxels are symmetrical
assert((unsigned)Voxels[(unsigned)Voxels[voxel_index][3]][3] == voxel_index); // make sure voxels are symmetrical
if ((int)(i & 31) == forbidden_direction) continue;
@ -780,7 +942,7 @@ void BC1Encoder::EndpointSearch(Color4x4 &pixels, EncodeResults &block) const {
trial_result.high.b = (uint8_t)clamp(trial_result.high.b + delta[2], 0, 31);
}
switch(block.color_mode) {
switch (block.color_mode) {
default:
case ColorMode::FourColor:
FindSelectors<ColorMode::FourColor>(pixels, trial_result, _error_mode);

View File

@ -38,63 +38,42 @@ class BC1Encoder final : public BlockEncoder<BC1Block, 4, 4> {
public:
using InterpolatorPtr = std::shared_ptr<Interpolator>;
enum class Flags : uint32_t {
enum class Flags {
None = 0,
// Try to improve quality using the most likely total orderings.
// The total_orderings_to_try parameter will then control the number of total orderings to try for 4 color blocks, and the
// total_orderings_to_try3 parameter will control the number of total orderings to try for 3 color blocks (if they are enabled).
UseLikelyTotalOrderings = 2,
UseLikelyTotalOrderings = 1,
// Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option).
// Recommended if you're enabling UseLikelyTotalOrderings.
TwoLeastSquaresPasses = 4,
TwoLeastSquaresPasses = 2,
// Use3ColorBlocksForBlackPixels allows the BC1 encoder to use 3-color blocks for blocks containing black or very dark pixels.
// You shader/engine MUST ignore the alpha channel on textures encoded with this flag.
// Average quality goes up substantially for my 100 texture corpus (~.5 dB), so it's worth using if you can.
// Note the BC1 encoder does not actually support transparency in 3-color mode.
// Don't set when encoding to BC3.
Use3ColorBlocksForBlackPixels = 8,
Use3ColorBlocksForBlackPixels = 4,
// If Use3ColorBlocks is set, the encoder can use 3-color mode for a small but noticeable gain in average quality, but lower perf.
// If you also specify the UseLikelyTotalOrderings flag, set the total_orderings_to_try3 paramter to the number of total orderings to try.
// Don't set when encoding to BC3.
Use3ColorBlocks = 16,
Use3ColorBlocks = 8,
// Iterative will greatly increase encode time, but is very slightly higher quality.
// Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, unless you just don't care about perf. at all.
Iterative = 32,
// BoundingBox enables a fast all-integer PCA approximation on 4-color blocks.
// At level 0 options (no other flags), this is ~15% faster, and higher *average* quality.
BoundingBox = 64,
// Use a slightly lower quality, but ~30% faster MSE evaluation function for 4-color blocks.
UseFasterMSEEval = 128,
// Examine all colors to compute selectors/MSE (slower than default)
UseFullMSEEval = 256,
// Use 2D least squares+inset+optimal rounding (the method used in Humus's GPU texture encoding demo), instead of PCA.
// Around 18% faster, very slightly lower average quality to better (depends on the content).
Use2DLS = 512,
Iterative = 16,
// Use 6 power iterations vs. 4 for PCA.
Use6PowerIters = 2048,
Use6PowerIters = 32,
// Check all total orderings - *very* slow. The encoder is not designed to be used in this way.
Exhaustive = 8192,
Exhaustive = 64,
// Try 2 different ways of choosing the initial endpoints.
TryAllInitialEndpoints = 16384,
// Same as BoundingBox, but implemented using integer math (faster, slightly less quality)
BoundingBoxInt = 32768,
// Try refining the final endpoints by examining nearby colors.
EndpointSearchRoundsShift = 22,
EndpointSearchRoundsMask = 1023U << EndpointSearchRoundsShift,
TryAllInitialEndpoints = 128,
};
enum class ColorMode {
@ -108,8 +87,70 @@ class BC1Encoder final : public BlockEncoder<BC1Block, 4, 4> {
FourColorSolid = FourColor | Solid,
};
enum class ErrorMode { None, Faster, Check2, Full };
enum class EndpointMode { LeastSquares, BoundingBox, BoundingBoxInt, PCA };
enum class ErrorMode {
// Perform no error checking at all.
None,
// Use a slightly lower quality, but ~30% faster MSE evaluation function for 4-color blocks.
Faster,
// Default error mode.
Check2,
// Examine all colors to compute selectors/MSE (slower than default).
Full
};
enum class EndpointMode {
// Use 2D least squares+inset+optimal rounding (the method used in Humus's GPU texture encoding demo), instead of PCA.
// Around 18% faster, very slightly lower average quality to better (depends on the content).
LeastSquares,
// BoundingBox enables a fast all-integer PCA approximation on 4-color blocks.
// At level 0 options (no other flags), this is ~15% faster, and higher *average* quality.
BoundingBox,
// Same as BoundingBox, but implemented using integer math (faster, slightly less quality)
BoundingBoxInt,
// Full PCA implementation
PCA
};
BC1Encoder(InterpolatorPtr interpolator);
BC1Encoder(unsigned level = 5, bool allow_3color = true, bool allow_3color_black = true);
BC1Encoder(InterpolatorPtr interpolator, unsigned level, bool allow_3color = true, bool allow_3color_black = true);
BC1Encoder(InterpolatorPtr interpolator, Flags flags, ErrorMode error_mode = ErrorMode::Full, EndpointMode endpoint_mode = EndpointMode::PCA,
unsigned search_rounds = 16, unsigned orderings4 = 32, unsigned orderings3 = 32);
const InterpolatorPtr &GetInterpolator() const;
void SetLevel(unsigned level, bool allow_3color = true, bool allow_3color_black = true);
Flags GetFlags() const { return _flags; }
void SetFlags(Flags flags) { _flags = flags; };
ErrorMode GetErrorMode() const { return _error_mode; }
void SetErrorMode(ErrorMode error_mode) { _error_mode = error_mode; };
EndpointMode GetEndpointMode() const { return _endpoint_mode; }
void SetEndpointMode(EndpointMode endpoint_mode) { _endpoint_mode = endpoint_mode; }
unsigned int GetSearchRounds() const { return _search_rounds; }
void SetSearchRounds(unsigned search_rounds) { _search_rounds = search_rounds; }
unsigned int GetOrderings4() const { return _orderings4; }
unsigned int GetOrderings3() const { return _orderings3; }
void SetOrderings(unsigned orderings4, unsigned orderings3);
void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override;
private:
using Hash = uint16_t;
using BlockMetrics = Color4x4::BlockMetrics;
// Unpacked BC1 block with metadata
struct EncodeResults {
@ -120,14 +161,6 @@ class BC1Encoder final : public BlockEncoder<BC1Block, 4, 4> {
unsigned error = UINT_MAX;
};
BC1Encoder(InterpolatorPtr interpolator);
void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override;
private:
using Hash = uint16_t;
using BlockMetrics = Color4x4::BlockMetrics;
const InterpolatorPtr _interpolator;
// match tables used for single-color blocks

View File

@ -69,7 +69,14 @@ template <size_t N> class Histogram {
}
unsigned GetPacked() const {
return Pack<uint8_t, Hash, 4, N>(_bins);
Hash packed = 0;
for (unsigned i = 0; i < (N-1); i++) {
assert(_bins[i] <= (1U << 4) - 1U);
packed |= static_cast<uint16_t>(_bins[i]) << (i * 4U);
}
return packed;
}
private:

View File

@ -158,7 +158,7 @@ template <size_t M, size_t N> class ColorBlockView : public BlockView<Color, M,
total++;
}
if (total > 0) metrics.avg = (metrics.sums + Vector4Int(total / 2)) / total; // half-total added for better rounding
if (total > 0) metrics.avg = (metrics.sums + Vector4Int(total / 2)) / (int)total; // half-total added for better rounding
return metrics;
}

View File

@ -673,7 +673,7 @@ int main(int argc, char *argv[]) {
for (int i = 0; i < test_count; i++)
bc4_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());
} else if (dxgi_format == DXGI_FORMAT_BC1_UNORM) {
auto bc1_encoder = BC1Encoder(Interpolator::MakeInterpolator());
auto bc1_encoder = BC1Encoder(bc1_quality_level, use_bc1_3color_mode, use_bc1_3color_mode_for_black);
Color *src = &source_image.get_pixels()[0];
bc1_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());