diff --git a/src/BC1/BC1Encoder.cpp b/src/BC1/BC1Encoder.cpp index 80dad2b..d9c302b 100644 --- a/src/BC1/BC1Encoder.cpp +++ b/src/BC1/BC1Encoder.cpp @@ -36,73 +36,39 @@ #include "../Vector4Int.h" #include "../bitwiseEnums.h" #include "../util.h" +#include "OrderTable.h" +#include "SingleColorTable.h" namespace rgbcx { -using MatchList = std::array; -using MatchListPtr = std::shared_ptr; using InterpolatorPtr = std::shared_ptr; +using Hist3 = OrderTable<3>::Histogram; +using Hist4 = OrderTable<4>::Histogram; // region Free Functions/Templates -inline void PrepSingleColorTableEntry(unsigned &error, MatchList &match_table, uint8_t v, unsigned i, uint8_t low, uint8_t high, uint8_t low8, uint8_t high8, - bool ideal) { - unsigned new_error = iabs(v - (int)i); - - // We only need to factor in 3% error in BC1 ideal mode. - if (ideal) new_error += (iabs(high8 - (int)low8) * 3) / 100; - - // Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation. - if ((new_error < error) || (new_error == error && low == high)) { - assert(new_error <= UINT8_MAX); - - match_table[i].low = (uint8_t)low; - match_table[i].high = (uint8_t)high; - match_table[i].error = (uint8_t)new_error; - - error = new_error; - } -} - -template void PrepSingleColorTable(MatchList &match_table, MatchList &match_table_half, Interpolator &interpolator) { - unsigned size = 1 << S; - - assert((S == 5 && size == 32) || (S == 6 && size == 64)); - - bool ideal = interpolator.IsIdeal(); - bool use_8bit = interpolator.CanInterpolate8Bit(); - - for (unsigned i = 0; i < 256; i++) { - unsigned error = 256; - unsigned error_half = 256; - - // TODO: Can probably avoid testing for values that definitely wont yield good results, - // e.g. low8 and high8 both much smaller or larger than index - for (uint8_t low = 0; low < size; low++) { - uint8_t low8 = (S == 5) ? scale5To8(low) : scale6To8(low); - - for (uint8_t high = 0; high < size; high++) { - uint8_t high8 = (S == 5) ? scale5To8(high) : scale6To8(high); - uint8_t value, value_half; - - if (use_8bit) { - value = interpolator.Interpolate8(high8, low8); - value_half = interpolator.InterpolateHalf8(high8, low8); - } else { - value = (S == 5) ? interpolator.Interpolate5(high, low) : interpolator.Interpolate6(high, low); - value_half = (S == 5) ? interpolator.InterpolateHalf5(high, low) : interpolator.InterpolateHalf6(high, low); - } - - PrepSingleColorTableEntry(error, match_table, value, i, low, high, low8, high8, ideal); - PrepSingleColorTableEntry(error_half, match_table_half, value_half, i, low, high, low8, high8, ideal); - } - } - } -} // endregion +// Static Fields +OrderTable<3> *BC1Encoder::order_table3 = nullptr; +OrderTable<4> *BC1Encoder::order_table4 = nullptr; +std::mutex BC1Encoder::order_table_mutex = std::mutex(); +bool BC1Encoder::order_tables_generated = false; + BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) { - PrepSingleColorTable<5>(*_single_match5, *_single_match5_half, *_interpolator); - PrepSingleColorTable<6>(*_single_match6, *_single_match6_half, *_interpolator); - _flags = Flags::UseFullMSEEval | Flags::TwoLeastSquaresPasses; + _flags = Flags::UseFasterMSEEval | Flags::TwoLeastSquaresPasses; + + // generate lookup tables + order_table_mutex.lock(); + if (!order_tables_generated) { + assert(order_table3 == nullptr); + assert(order_table4 == nullptr); + + order_table3 = new OrderTable<3>(); + order_table4 = new OrderTable<4>(); + order_tables_generated = true; + } + assert(order_table3 != nullptr); + assert(order_table4 != nullptr); + order_table_mutex.unlock(); } void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { @@ -125,10 +91,11 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None); unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1; - unsigned total_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1; + unsigned total_ep_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1; + // Initial block generation EncodeResults result; - for (unsigned round = 0; round < total_rounds; round++) { + for (unsigned round = 0; round < total_ep_rounds; round++) { Flags modified_flags = _flags; if (round == 1) { modified_flags &= ~(Flags::Use2DLS | Flags::BoundingBoxInt); @@ -158,6 +125,54 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const { if (!needs_block_error || round_result.error < result.error) { result = round_result; } } + // First refinement pass using ordered cluster fit + if (result.error > 0 && (_flags & Flags::UseLikelyTotalOrderings) != Flags::None) { + const unsigned total_iters = (_flags & Flags::Iterative) != Flags::None ? 2 : 1; + for (unsigned iter = 0; iter < total_iters; iter++) { + EncodeResults orig = result; + Hist4 h(orig.selectors); + + const Hash order_index = order_table4->GetHash(h); + + Color low = orig.low.ScaleFrom565(); + Color high = orig.high.ScaleFrom565(); + + Vector4Int axis = high - low; + std::array color_vectors; + + std::array dots; + for (unsigned i = 0; i < 16; i++) { + color_vectors[i] = Vector4::FromColorRGB(pixels.Get(i)); + int dot = 0x1000000 + color_vectors[i].Dot(axis); + assert(dot >= 0); + dots[i] = (uint32_t)(dot << 4) | i; + } + + std::sort(dots.begin(), dots.end()); + + // we now have a list of indices and their dot products along the primary axis + std::array sums; + for (unsigned i = 0; i < 16; i++) { + const unsigned p = dots[i] & 0xF; + sums[i + 1] = sums[i] + color_vectors[p]; + } + + const unsigned q_total = ((_flags & Flags::Exhaustive) != Flags::None) ? order_table4->UniqueOrderings + : (unsigned)clampi(_orderings4, MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4); + for (unsigned q = 0; q < q_total; q++) { + Hash s = ((_flags & Flags::Exhaustive) != Flags::None) ? q : g_best_total_orderings4[order_index][q]; + + EncodeResults trial = orig; + Vector4 low, high; + if (order_table4->IsSingleColor(order_index)) { + trial.is_1_color = true; + trial.is_3_color = false; + } else { + } + } + } + } + if (result.low == result.high) { EncodeBlockSingleColor(metrics.avg, dest); } else { @@ -172,19 +187,15 @@ void BC1Encoder::EncodeBlockSingleColor(Color color, BC1Block *dest) const { bool using_3color = false; // why is there no subscript operator for shared_ptr - MatchList &match5 = *_single_match5; - MatchList &match6 = *_single_match6; - MatchList &match5_half = *_single_match5_half; - MatchList &match6_half = *_single_match6_half; - BC1MatchEntry match_r = match5[color.r]; - BC1MatchEntry match_g = match6[color.g]; - BC1MatchEntry match_b = match5[color.b]; + auto match_r = _single_match5[color.r]; + auto match_g = _single_match6[color.g]; + auto match_b = _single_match5[color.b]; if ((_flags & (Flags::Use3ColorBlocks | Flags::Use3ColorBlocksForBlackPixels)) != Flags::None) { - BC1MatchEntry match_r_half = match5_half[color.r]; - BC1MatchEntry match_g_half = match6_half[color.g]; - BC1MatchEntry match_b_half = match5_half[color.b]; + auto match_r_half = _single_match5_half[color.r]; + auto match_g_half = _single_match6_half[color.g]; + auto match_b_half = _single_match5_half[color.b]; const unsigned err4 = match_r.error + match_g.error + match_b.error; const unsigned err3 = match_r_half.error + match_g_half.error + match_b_half.error; @@ -590,4 +601,17 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Block block.high = Color::PreciseRound565(high); return true; } +/* +bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, Hash hash, Vector4 &matrix, std::array &sums, + bool is_3color, bool use_black) const { + unsigned f1, f2, f3; + int denominator = is_3color ? 2 : 3; + + if (is_3color) { + order_table3->GetUniqueOrderingSums(hash, f1, f2, f3); + } else { + order_table4->GetUniqueOrderingSums(hash, f1, f2, f3); + } +}*/ + } // namespace rgbcx \ No newline at end of file diff --git a/src/BC1/BC1Encoder.h b/src/BC1/BC1Encoder.h index 99db2b4..b301df7 100644 --- a/src/BC1/BC1Encoder.h +++ b/src/BC1/BC1Encoder.h @@ -30,20 +30,15 @@ #include "../bitwiseEnums.h" #include "../ndebug.h" #include "BC1Block.h" +#include "OrderTable.h" +#include "SingleColorTable.h" #include "tables.h" namespace rgbcx { -struct BC1MatchEntry { - uint8_t high; - uint8_t low; - uint8_t error; -}; - class BC1Encoder : public BlockEncoder { public: using InterpolatorPtr = std::shared_ptr; - using BlockMetrics = Color4x4::BlockMetrics; enum class Flags : uint32_t { None = 0, @@ -109,13 +104,6 @@ class BC1Encoder : public BlockEncoder { void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override; private: - const InterpolatorPtr _interpolator; - - Flags _flags; - unsigned _search_rounds; - unsigned _orderings4; - unsigned _orderings3; - // Unpacked BC1 block with metadata struct EncodeResults { Color low; @@ -126,6 +114,30 @@ class BC1Encoder : public BlockEncoder { unsigned error = UINT_MAX; }; + using Hash = uint16_t; + using BlockMetrics = Color4x4::BlockMetrics; + + const InterpolatorPtr _interpolator; + + // match tables used for single-color blocks + // Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible, + // with an included error value + // these depend on the interpolator + const SingleColorTable<5, 4> _single_match5 = SingleColorTable<5, 4>(_interpolator); + const SingleColorTable<6, 4> _single_match6 = SingleColorTable<6, 4>(_interpolator); + const SingleColorTable<5, 3> _single_match5_half = SingleColorTable<5, 3>(_interpolator); + const SingleColorTable<6, 3> _single_match6_half = SingleColorTable<6, 3>(_interpolator); + + Flags _flags; + unsigned _search_rounds; + unsigned _orderings4; + unsigned _orderings3; + + static OrderTable<4> *order_table4; // order table for 3-color blocks + static OrderTable<3> *order_table3; // order table for 4-color blocks + static std::mutex order_table_mutex; // prevent race condition with multiple BC1Encoders constructed at once + static bool order_tables_generated; // have the order tables been generated by a previous instance? + void EncodeBlockSingleColor(Color color, BC1Block *dest) const; void EncodeBlock4Color(EncodeResults &block, BC1Block *dest) const; @@ -133,36 +145,7 @@ class BC1Encoder : public BlockEncoder { unsigned FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, bool use_err) const; bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, bool is_3color, bool use_black) const; - - // match tables used for single-color blocks - // Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible, - // with an included error value - // these depend on the interpolator - using MatchList = std::array; - using MatchListPtr = std::shared_ptr; - - const MatchListPtr _single_match5 = std::make_shared(); - const MatchListPtr _single_match6 = std::make_shared(); - const MatchListPtr _single_match5_half = std::make_shared(); - const MatchListPtr _single_match6_half = std::make_shared(); - - // static lookup tables, generated the first time an encoder is created - // the mutex prevents race conditions if two encoders are created on different threads - static std::mutex _luts_mutex; - static bool _luts_initialized; - - // lookup table for hash values - static uint16_t g_total_ordering4_hash[4096]; - static uint16_t g_total_ordering3_hash[256]; - - static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3]; - static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3]; - - // This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) - // where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. - static constexpr uint32_t g_weight_vals4[4] = {0x000009, 0x010204, 0x040201, 0x090000}; - - // multiplier is 4 for 3-color - static constexpr uint32_t g_weight_vals3[3] = {0x000004, 0x040000, 0x010101}; +/* bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, Hash hash, Vector4 &matrix, std::array &sums, + bool is_3color, bool use_black) const;*/ }; } // namespace rgbcx diff --git a/src/BC1/ClusterFit.h b/src/BC1/ClusterFit.h new file mode 100644 index 0000000..691bf89 --- /dev/null +++ b/src/BC1/ClusterFit.h @@ -0,0 +1,26 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#pragma once + +namespace rgbcx { + +void ComputeEndpoints() + +} // namespace rgbcx::ClusterFit \ No newline at end of file diff --git a/src/BC1/OrderTable.h b/src/BC1/OrderTable.h new file mode 100644 index 0000000..e526dbd --- /dev/null +++ b/src/BC1/OrderTable.h @@ -0,0 +1,151 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include "../Vector4.h" +#include "../util.h" +#include "tables.h" + +namespace rgbcx { + +template class OrderTable { + public: + using Hash = uint16_t; + using FactorMatrix = std::array; + + class Histogram { + public: + Histogram() { _bins = {0}; } + + Histogram(std::array sels) { + _bins = {0}; + for (unsigned i = 0; i < 16; i++) { + assert(sels[i] < N); + _bins[sels[i]]++; + } + } + + uint8_t operator[](size_t index) const { + assert(index < N); + return _bins[index]; + } + uint8_t &operator[](size_t index) { + assert(index < N); + return _bins[index]; + } + + bool Any16() { + return std::any_of(_bins.begin(), _bins.end(), [](int i) { return i == 16; }); + } + + unsigned GetPacked() const { + unsigned packed = 0; + for (unsigned i = 0; i < (N - 1); i++) { packed |= (_bins[i] << (4 * i)); } + + assert(packed < TotalHashes); + + return packed; + } + + private: + std::array _bins; + }; + + static inline constexpr size_t UniqueOrderings = (N == 4) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : NUM_UNIQUE_TOTAL_ORDERINGS3; + static inline constexpr size_t TotalHashes = (N == 4) ? 4096 : 256; + + static inline constexpr uint8_t GetUniqueOrdering(Hash hash, unsigned selector) { + if constexpr (N == 4) { return g_unique_total_orders4[hash][selector]; } + return g_unique_total_orders3[hash][selector]; + } + + static inline constexpr void GetUniqueOrderingSums(Hash hash, unsigned &f1, unsigned &f2, unsigned &f3) { + f1 = GetUniqueOrdering(hash, 0); + f2 = f1 + GetUniqueOrdering(hash, 1); + f3 = f2 + GetUniqueOrdering(hash, 2); + } + + OrderTable() { + static_assert(N == 4 || N == 3); + + const unsigned *weight_vals = (N == 4) ? g_weight_vals4 : g_weight_vals3; + const float denominator = (N == 4) ? 3.0f : 2.0f; + + for (unsigned i = 0; i < UniqueOrderings; i++) { + Histogram h; + for (unsigned j = 0; j < N; j++) { h[j] = GetUniqueOrdering(i, j); } + + if (!h.Any16()) _hashes[h.GetPacked()] = (Hash)i; + + unsigned weight_accum = 0; + for (unsigned sel = 0; sel < N; sel++) weight_accum += (weight_vals[sel] * h[sel]); + + // todo: use a Vector4 here instead for SIMD readiness + float z00 = (float)((weight_accum >> 16) & 0xFF); + float z10 = (float)((weight_accum >> 8) & 0xFF); + float z11 = (float)(weight_accum & 0xFF); + float z01 = z10; + + float det = z00 * z11 - z01 * z10; + if (fabs(det) < 1e-8f) { + _factors[i][0] = 0; + _factors[i][1] = 0; + _factors[i][2] = 0; + } else { + det = (denominator / 255.0f) / det; + _factors[i][0] = z11 * det; + _factors[i][1] = -z10 * det; + _factors[i][2] = z00 * det; + } + } + } + + Hash GetHash(Histogram &hist) const { + for (unsigned i = 0; i < N; i++) { + if (hist[i] == 16) return GetSingleColorHashes()[i]; + } + + return _hashes[hist.GetPacked()]; + } + + Vector4 GetFactors(Hash hash) { return Vector4(_factors[hash][0], _factors[hash][1], _factors[hash][1], _factors[hash][2]); } + + static inline constexpr std::array GetSingleColorHashes() { + if (N == 4) { return {15, 700, 753, 515}; } + return {12, 15, 89}; + } + + static inline constexpr bool IsSingleColor(Hash hash) { + auto hashes = GetSingleColorHashes(); + return (std::find(hashes.begin(), hashes.end(), hash) != hashes.end()); + } + + private: + std::array _hashes; + std::array _factors; +}; + +} // namespace rgbcx \ No newline at end of file diff --git a/src/BC1/SingleColorTable.h b/src/BC1/SingleColorTable.h new file mode 100644 index 0000000..d1d3cb1 --- /dev/null +++ b/src/BC1/SingleColorTable.h @@ -0,0 +1,107 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#pragma once + +#include +#include +#include + +#include "../Interpolator.h" +#include "../Util.h" + +namespace rgbcx { + +/** + * Lookup table for single-color blocks + * @tparam B Number of bits (5 or 6) + * @tparam N Number of colors (3 or 4) + */ +template class SingleColorTable { + public: + struct MatchEntry { + uint8_t high; + uint8_t low; + uint8_t error; + }; + + using MatchList = std::array; + using MatchListPtr = std::shared_ptr; + using InterpolatorPtr = std::shared_ptr; + + SingleColorTable(InterpolatorPtr interpolator) { + static_assert((B == 5 && Size == 32) || (B == 6 && Size == 64)); + static_assert(N == 4 || N == 3); + + bool ideal = interpolator->IsIdeal(); + bool use_8bit = interpolator->CanInterpolate8Bit(); + + for (unsigned i = 0; i < 256; i++) { + unsigned error = 256; + + // TODO: Can probably avoid testing for values that definitely wont yield good results, + // e.g. low8 and high8 both much smaller or larger than index + for (uint8_t low = 0; low < Size; low++) { + uint8_t low8 = (B == 5) ? scale5To8(low) : scale6To8(low); + + for (uint8_t high = 0; high < Size; high++) { + uint8_t high8 = (B == 5) ? scale5To8(high) : scale6To8(high); + uint8_t value; + + if (use_8bit) { + value = interpolator->Interpolate8(high8, low8); + } else { + value = (B == 5) ? interpolator->Interpolate5(high, low) : interpolator->Interpolate6(high, low); + } + + unsigned new_error = iabs(value - (int)i); + + // We only need to factor in 3% error in BC1 ideal mode. + if (ideal) new_error += (iabs(high8 - (int)low8) * 3) / 100; + + if ((new_error < error) || (new_error == error && low == high)) { + assert(new_error <= UINT8_MAX); + + (*_matches)[i].low = (uint8_t)low; + (*_matches)[i].high = (uint8_t)high; + (*_matches)[i].error = (uint8_t)new_error; + + error = new_error; + } + } + } + } + } + + MatchEntry operator[](size_t index) const { + assert(index <= UINT8_MAX); + return (*_matches)[index]; + } + MatchEntry &operator[](size_t index) { + assert(index <= UINT8_MAX); + return (*_matches)[index]; + } + + private: + static inline constexpr size_t Size = 1 << B; + + MatchListPtr _matches = std::make_shared(); +}; + +} // namespace rgbcx \ No newline at end of file diff --git a/src/BC1/tables.h b/src/BC1/tables.h index f306935..9fc3c04 100644 --- a/src/BC1/tables.h +++ b/src/BC1/tables.h @@ -4,6 +4,13 @@ #pragma once #include +// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w)) +// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier. +static constexpr uint32_t g_weight_vals4[4] = {0x000009, 0x010204, 0x040201, 0x090000}; + +// multiplier is 4 for 3-color +static constexpr uint32_t g_weight_vals3[3] = {0x000004, 0x040000, 0x010101}; + const uint32_t MIN_TOTAL_ORDERINGS = 1; const uint32_t MAX_TOTAL_ORDERINGS3 = 32; diff --git a/src/BlockEncoder.h b/src/BlockEncoder.h index 0255add..9568a3e 100644 --- a/src/BlockEncoder.h +++ b/src/BlockEncoder.h @@ -36,7 +36,7 @@ template class BlockEncoder { virtual void EncodeBlock(DecodedBlock pixels, EncodedBlock *dest) const = 0; - void EncodeImage(uint8_t *encoded, Color *decoded, unsigned image_width, unsigned image_height) { + virtual void EncodeImage(uint8_t *encoded, Color *decoded, unsigned image_width, unsigned image_height) { assert(image_width % N == 0); assert(image_width % M == 0); diff --git a/src/Vector4.h b/src/Vector4.h index c111da7..183886a 100644 --- a/src/Vector4.h +++ b/src/Vector4.h @@ -20,8 +20,8 @@ #pragma once #include -#include #include +#include #include "Color.h" @@ -29,7 +29,9 @@ namespace rgbcx { class Vector4 { public: - Vector4(float x = 0, float y = 0, float z = 0, float w = 0) { + Vector4() : Vector4(0) {} + + Vector4(float x, float y, float z = 0, float w = 0) { _c[0] = x; _c[1] = y; _c[2] = z; @@ -96,6 +98,9 @@ class Vector4 { return max; } + unsigned int SqrMag() { return (unsigned)Dot(*this, *this); } + + private: template friend Vector4 DoOp(const Vector4 &lhs, const Vector4 &rhs, Op f) { Vector4 r; diff --git a/src/Vector4Int.h b/src/Vector4Int.h index dba6e7c..dfbf783 100644 --- a/src/Vector4Int.h +++ b/src/Vector4Int.h @@ -49,7 +49,7 @@ class Vector4Int { static Vector4Int FromColorRGB(const Color &c) { return Vector4Int(c.r, c.g, c.b); } - static int Dot(Vector4Int &lhs, Vector4Int &rhs) { + static int Dot(const Vector4Int &lhs, const Vector4Int &rhs) { int sum = 0; for (unsigned i = 0; i < 4; i++) { sum += lhs[i] * rhs[i]; } return sum; @@ -86,7 +86,7 @@ class Vector4Int { friend Vector4Int &operator*=(Vector4Int &lhs, const int &rhs) { return lhs = lhs * rhs; } friend Vector4Int &operator/=(Vector4Int &lhs, const int &rhs) { return lhs = lhs / rhs; } - int Dot(Vector4Int other) { return Dot(*this, other); } + int Dot(const Vector4Int &other) const { return Dot(*this, other); } int MaxAbs(unsigned channels = 4) { assert(channels < 5); assert(channels > 0); diff --git a/src/util.h b/src/util.h index 4da4af4..b462749 100644 --- a/src/util.h +++ b/src/util.h @@ -39,7 +39,7 @@ template constexpr auto iabs(S i) { /** * Unpacks an unsigned integer into an array of smaller integers. - * @tparam I Input data type. Must be an unsigned integral type large enough to hold C * S bits. + * @tparam I Input data type. Must be an unsigned integral type large enough to hold C * N bits. * @tparam O Output data type. must be an unsigned integral type large enough to hold C bits.. * @tparam S Number of bits in each value. * @tparam C Number of values to unpack. @@ -53,7 +53,7 @@ template constexpr auto Unpack(I pa static_assert(std::numeric_limits::digits >= (C * S), "Packed input type must be big enough to represent the number of bits multiplied by count"); static_assert(std::numeric_limits::digits >= S, "Unpacked output type must be big enough to represent the number of bits"); - constexpr O mask = (1U << S) - 1U; // maximum value representable by S bits + constexpr O mask = (1U << S) - 1U; // maximum value representable by N bits std::array vals; // output values array of size C for (unsigned i = 0; i < C; i++) { @@ -67,7 +67,7 @@ template constexpr auto Unpack(I pa /** * Packs an array of unsigned integers into a single integer. * @tparam I Input data type. Must be an unsigned integral type large enough to hold C bits. - * @tparam O Output data type. must be an unsigned integral type large enough to hold C * S bits. + * @tparam O Output data type. must be an unsigned integral type large enough to hold C * N bits. * @tparam S Number of bits in each value. * @tparam C Number of values to unpack. * @param vals Unpacked std::array of type I and size C.