Add ordertable and move single color tables to their own template class

This commit is contained in:
Andrew Cassidy 2021-02-26 22:16:12 -08:00
parent 68896aca1a
commit 5b492fd4b5
10 changed files with 427 additions and 124 deletions

View File

@ -36,73 +36,39 @@
#include "../Vector4Int.h"
#include "../bitwiseEnums.h"
#include "../util.h"
#include "OrderTable.h"
#include "SingleColorTable.h"
namespace rgbcx {
using MatchList = std::array<BC1MatchEntry, 256>;
using MatchListPtr = std::shared_ptr<MatchList>;
using InterpolatorPtr = std::shared_ptr<Interpolator>;
using Hist3 = OrderTable<3>::Histogram;
using Hist4 = OrderTable<4>::Histogram;
// region Free Functions/Templates
inline void PrepSingleColorTableEntry(unsigned &error, MatchList &match_table, uint8_t v, unsigned i, uint8_t low, uint8_t high, uint8_t low8, uint8_t high8,
bool ideal) {
unsigned new_error = iabs(v - (int)i);
// We only need to factor in 3% error in BC1 ideal mode.
if (ideal) new_error += (iabs(high8 - (int)low8) * 3) / 100;
// Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
if ((new_error < error) || (new_error == error && low == high)) {
assert(new_error <= UINT8_MAX);
match_table[i].low = (uint8_t)low;
match_table[i].high = (uint8_t)high;
match_table[i].error = (uint8_t)new_error;
error = new_error;
}
}
template <size_t S> void PrepSingleColorTable(MatchList &match_table, MatchList &match_table_half, Interpolator &interpolator) {
unsigned size = 1 << S;
assert((S == 5 && size == 32) || (S == 6 && size == 64));
bool ideal = interpolator.IsIdeal();
bool use_8bit = interpolator.CanInterpolate8Bit();
for (unsigned i = 0; i < 256; i++) {
unsigned error = 256;
unsigned error_half = 256;
// TODO: Can probably avoid testing for values that definitely wont yield good results,
// e.g. low8 and high8 both much smaller or larger than index
for (uint8_t low = 0; low < size; low++) {
uint8_t low8 = (S == 5) ? scale5To8(low) : scale6To8(low);
for (uint8_t high = 0; high < size; high++) {
uint8_t high8 = (S == 5) ? scale5To8(high) : scale6To8(high);
uint8_t value, value_half;
if (use_8bit) {
value = interpolator.Interpolate8(high8, low8);
value_half = interpolator.InterpolateHalf8(high8, low8);
} else {
value = (S == 5) ? interpolator.Interpolate5(high, low) : interpolator.Interpolate6(high, low);
value_half = (S == 5) ? interpolator.InterpolateHalf5(high, low) : interpolator.InterpolateHalf6(high, low);
}
PrepSingleColorTableEntry(error, match_table, value, i, low, high, low8, high8, ideal);
PrepSingleColorTableEntry(error_half, match_table_half, value_half, i, low, high, low8, high8, ideal);
}
}
}
}
// endregion
// Static Fields
OrderTable<3> *BC1Encoder::order_table3 = nullptr;
OrderTable<4> *BC1Encoder::order_table4 = nullptr;
std::mutex BC1Encoder::order_table_mutex = std::mutex();
bool BC1Encoder::order_tables_generated = false;
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) {
PrepSingleColorTable<5>(*_single_match5, *_single_match5_half, *_interpolator);
PrepSingleColorTable<6>(*_single_match6, *_single_match6_half, *_interpolator);
_flags = Flags::UseFullMSEEval | Flags::TwoLeastSquaresPasses;
_flags = Flags::UseFasterMSEEval | Flags::TwoLeastSquaresPasses;
// generate lookup tables
order_table_mutex.lock();
if (!order_tables_generated) {
assert(order_table3 == nullptr);
assert(order_table4 == nullptr);
order_table3 = new OrderTable<3>();
order_table4 = new OrderTable<4>();
order_tables_generated = true;
}
assert(order_table3 != nullptr);
assert(order_table4 != nullptr);
order_table_mutex.unlock();
}
void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
@ -125,10 +91,11 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
needs_block_error |= metrics.has_black && ((_flags & Flags::Use3ColorBlocksForBlackPixels) != Flags::None);
unsigned total_ls_passes = (_flags & Flags::TwoLeastSquaresPasses) != Flags::None ? 2 : 1;
unsigned total_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1;
unsigned total_ep_rounds = needs_block_error && ((_flags & Flags::TryAllInitialEndpoints) != Flags::None) ? 2 : 1;
// Initial block generation
EncodeResults result;
for (unsigned round = 0; round < total_rounds; round++) {
for (unsigned round = 0; round < total_ep_rounds; round++) {
Flags modified_flags = _flags;
if (round == 1) {
modified_flags &= ~(Flags::Use2DLS | Flags::BoundingBoxInt);
@ -158,6 +125,54 @@ void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
if (!needs_block_error || round_result.error < result.error) { result = round_result; }
}
// First refinement pass using ordered cluster fit
if (result.error > 0 && (_flags & Flags::UseLikelyTotalOrderings) != Flags::None) {
const unsigned total_iters = (_flags & Flags::Iterative) != Flags::None ? 2 : 1;
for (unsigned iter = 0; iter < total_iters; iter++) {
EncodeResults orig = result;
Hist4 h(orig.selectors);
const Hash order_index = order_table4->GetHash(h);
Color low = orig.low.ScaleFrom565();
Color high = orig.high.ScaleFrom565();
Vector4Int axis = high - low;
std::array<Vector4, 16> color_vectors;
std::array<uint32_t, 16> dots;
for (unsigned i = 0; i < 16; i++) {
color_vectors[i] = Vector4::FromColorRGB(pixels.Get(i));
int dot = 0x1000000 + color_vectors[i].Dot(axis);
assert(dot >= 0);
dots[i] = (uint32_t)(dot << 4) | i;
}
std::sort(dots.begin(), dots.end());
// we now have a list of indices and their dot products along the primary axis
std::array<Vector4, 17> sums;
for (unsigned i = 0; i < 16; i++) {
const unsigned p = dots[i] & 0xF;
sums[i + 1] = sums[i] + color_vectors[p];
}
const unsigned q_total = ((_flags & Flags::Exhaustive) != Flags::None) ? order_table4->UniqueOrderings
: (unsigned)clampi(_orderings4, MIN_TOTAL_ORDERINGS, MAX_TOTAL_ORDERINGS4);
for (unsigned q = 0; q < q_total; q++) {
Hash s = ((_flags & Flags::Exhaustive) != Flags::None) ? q : g_best_total_orderings4[order_index][q];
EncodeResults trial = orig;
Vector4 low, high;
if (order_table4->IsSingleColor(order_index)) {
trial.is_1_color = true;
trial.is_3_color = false;
} else {
}
}
}
}
if (result.low == result.high) {
EncodeBlockSingleColor(metrics.avg, dest);
} else {
@ -172,19 +187,15 @@ void BC1Encoder::EncodeBlockSingleColor(Color color, BC1Block *dest) const {
bool using_3color = false;
// why is there no subscript operator for shared_ptr<array>
MatchList &match5 = *_single_match5;
MatchList &match6 = *_single_match6;
MatchList &match5_half = *_single_match5_half;
MatchList &match6_half = *_single_match6_half;
BC1MatchEntry match_r = match5[color.r];
BC1MatchEntry match_g = match6[color.g];
BC1MatchEntry match_b = match5[color.b];
auto match_r = _single_match5[color.r];
auto match_g = _single_match6[color.g];
auto match_b = _single_match5[color.b];
if ((_flags & (Flags::Use3ColorBlocks | Flags::Use3ColorBlocksForBlackPixels)) != Flags::None) {
BC1MatchEntry match_r_half = match5_half[color.r];
BC1MatchEntry match_g_half = match6_half[color.g];
BC1MatchEntry match_b_half = match5_half[color.b];
auto match_r_half = _single_match5_half[color.r];
auto match_g_half = _single_match6_half[color.g];
auto match_b_half = _single_match5_half[color.b];
const unsigned err4 = match_r.error + match_g.error + match_b.error;
const unsigned err3 = match_r_half.error + match_g_half.error + match_b_half.error;
@ -590,4 +601,17 @@ bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, Block
block.high = Color::PreciseRound565(high);
return true;
}
/*
bool BC1Encoder::ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, Hash hash, Vector4 &matrix, std::array<Vector4, 17> &sums,
bool is_3color, bool use_black) const {
unsigned f1, f2, f3;
int denominator = is_3color ? 2 : 3;
if (is_3color) {
order_table3->GetUniqueOrderingSums(hash, f1, f2, f3);
} else {
order_table4->GetUniqueOrderingSums(hash, f1, f2, f3);
}
}*/
} // namespace rgbcx

View File

@ -30,20 +30,15 @@
#include "../bitwiseEnums.h"
#include "../ndebug.h"
#include "BC1Block.h"
#include "OrderTable.h"
#include "SingleColorTable.h"
#include "tables.h"
namespace rgbcx {
struct BC1MatchEntry {
uint8_t high;
uint8_t low;
uint8_t error;
};
class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
public:
using InterpolatorPtr = std::shared_ptr<Interpolator>;
using BlockMetrics = Color4x4::BlockMetrics;
enum class Flags : uint32_t {
None = 0,
@ -109,13 +104,6 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override;
private:
const InterpolatorPtr _interpolator;
Flags _flags;
unsigned _search_rounds;
unsigned _orderings4;
unsigned _orderings3;
// Unpacked BC1 block with metadata
struct EncodeResults {
Color low;
@ -126,6 +114,30 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
unsigned error = UINT_MAX;
};
using Hash = uint16_t;
using BlockMetrics = Color4x4::BlockMetrics;
const InterpolatorPtr _interpolator;
// match tables used for single-color blocks
// Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible,
// with an included error value
// these depend on the interpolator
const SingleColorTable<5, 4> _single_match5 = SingleColorTable<5, 4>(_interpolator);
const SingleColorTable<6, 4> _single_match6 = SingleColorTable<6, 4>(_interpolator);
const SingleColorTable<5, 3> _single_match5_half = SingleColorTable<5, 3>(_interpolator);
const SingleColorTable<6, 3> _single_match6_half = SingleColorTable<6, 3>(_interpolator);
Flags _flags;
unsigned _search_rounds;
unsigned _orderings4;
unsigned _orderings3;
static OrderTable<4> *order_table4; // order table for 3-color blocks
static OrderTable<3> *order_table3; // order table for 4-color blocks
static std::mutex order_table_mutex; // prevent race condition with multiple BC1Encoders constructed at once
static bool order_tables_generated; // have the order tables been generated by a previous instance?
void EncodeBlockSingleColor(Color color, BC1Block *dest) const;
void EncodeBlock4Color(EncodeResults &block, BC1Block *dest) const;
@ -133,36 +145,7 @@ class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
unsigned FindSelectors4(Color4x4 pixels, BC1Encoder::EncodeResults &block, bool use_err) const;
bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, bool is_3color, bool use_black) const;
// match tables used for single-color blocks
// Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible,
// with an included error value
// these depend on the interpolator
using MatchList = std::array<BC1MatchEntry, 256>;
using MatchListPtr = std::shared_ptr<MatchList>;
const MatchListPtr _single_match5 = std::make_shared<MatchList>();
const MatchListPtr _single_match6 = std::make_shared<MatchList>();
const MatchListPtr _single_match5_half = std::make_shared<MatchList>();
const MatchListPtr _single_match6_half = std::make_shared<MatchList>();
// static lookup tables, generated the first time an encoder is created
// the mutex prevents race conditions if two encoders are created on different threads
static std::mutex _luts_mutex;
static bool _luts_initialized;
// lookup table for hash values
static uint16_t g_total_ordering4_hash[4096];
static uint16_t g_total_ordering3_hash[256];
static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3];
static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3];
// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
static constexpr uint32_t g_weight_vals4[4] = {0x000009, 0x010204, 0x040201, 0x090000};
// multiplier is 4 for 3-color
static constexpr uint32_t g_weight_vals3[3] = {0x000004, 0x040000, 0x010101};
/* bool ComputeEndpointsLS(Color4x4 pixels, EncodeResults &block, BlockMetrics metrics, Hash hash, Vector4 &matrix, std::array<Vector4, 17> &sums,
bool is_3color, bool use_black) const;*/
};
} // namespace rgbcx

26
src/BC1/ClusterFit.h Normal file
View File

@ -0,0 +1,26 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
namespace rgbcx {
void ComputeEndpoints()
} // namespace rgbcx::ClusterFit

151
src/BC1/OrderTable.h Normal file
View File

@ -0,0 +1,151 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <algorithm>
#include <array>
#include <cstdint>
#include <mutex>
#include <numeric>
#include "../Vector4.h"
#include "../util.h"
#include "tables.h"
namespace rgbcx {
template <size_t N> class OrderTable {
public:
using Hash = uint16_t;
using FactorMatrix = std::array<float, 3>;
class Histogram {
public:
Histogram() { _bins = {0}; }
Histogram(std::array<uint8_t, 16> sels) {
_bins = {0};
for (unsigned i = 0; i < 16; i++) {
assert(sels[i] < N);
_bins[sels[i]]++;
}
}
uint8_t operator[](size_t index) const {
assert(index < N);
return _bins[index];
}
uint8_t &operator[](size_t index) {
assert(index < N);
return _bins[index];
}
bool Any16() {
return std::any_of(_bins.begin(), _bins.end(), [](int i) { return i == 16; });
}
unsigned GetPacked() const {
unsigned packed = 0;
for (unsigned i = 0; i < (N - 1); i++) { packed |= (_bins[i] << (4 * i)); }
assert(packed < TotalHashes);
return packed;
}
private:
std::array<uint8_t, N> _bins;
};
static inline constexpr size_t UniqueOrderings = (N == 4) ? NUM_UNIQUE_TOTAL_ORDERINGS4 : NUM_UNIQUE_TOTAL_ORDERINGS3;
static inline constexpr size_t TotalHashes = (N == 4) ? 4096 : 256;
static inline constexpr uint8_t GetUniqueOrdering(Hash hash, unsigned selector) {
if constexpr (N == 4) { return g_unique_total_orders4[hash][selector]; }
return g_unique_total_orders3[hash][selector];
}
static inline constexpr void GetUniqueOrderingSums(Hash hash, unsigned &f1, unsigned &f2, unsigned &f3) {
f1 = GetUniqueOrdering(hash, 0);
f2 = f1 + GetUniqueOrdering(hash, 1);
f3 = f2 + GetUniqueOrdering(hash, 2);
}
OrderTable<N>() {
static_assert(N == 4 || N == 3);
const unsigned *weight_vals = (N == 4) ? g_weight_vals4 : g_weight_vals3;
const float denominator = (N == 4) ? 3.0f : 2.0f;
for (unsigned i = 0; i < UniqueOrderings; i++) {
Histogram h;
for (unsigned j = 0; j < N; j++) { h[j] = GetUniqueOrdering(i, j); }
if (!h.Any16()) _hashes[h.GetPacked()] = (Hash)i;
unsigned weight_accum = 0;
for (unsigned sel = 0; sel < N; sel++) weight_accum += (weight_vals[sel] * h[sel]);
// todo: use a Vector4 here instead for SIMD readiness
float z00 = (float)((weight_accum >> 16) & 0xFF);
float z10 = (float)((weight_accum >> 8) & 0xFF);
float z11 = (float)(weight_accum & 0xFF);
float z01 = z10;
float det = z00 * z11 - z01 * z10;
if (fabs(det) < 1e-8f) {
_factors[i][0] = 0;
_factors[i][1] = 0;
_factors[i][2] = 0;
} else {
det = (denominator / 255.0f) / det;
_factors[i][0] = z11 * det;
_factors[i][1] = -z10 * det;
_factors[i][2] = z00 * det;
}
}
}
Hash GetHash(Histogram &hist) const {
for (unsigned i = 0; i < N; i++) {
if (hist[i] == 16) return GetSingleColorHashes()[i];
}
return _hashes[hist.GetPacked()];
}
Vector4 GetFactors(Hash hash) { return Vector4(_factors[hash][0], _factors[hash][1], _factors[hash][1], _factors[hash][2]); }
static inline constexpr std::array<Hash, N> GetSingleColorHashes() {
if (N == 4) { return {15, 700, 753, 515}; }
return {12, 15, 89};
}
static inline constexpr bool IsSingleColor(Hash hash) {
auto hashes = GetSingleColorHashes();
return (std::find(hashes.begin(), hashes.end(), hash) != hashes.end());
}
private:
std::array<Hash, TotalHashes> _hashes;
std::array<FactorMatrix, UniqueOrderings> _factors;
};
} // namespace rgbcx

107
src/BC1/SingleColorTable.h Normal file
View File

@ -0,0 +1,107 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <cstdint>
#include <memory>
#include "../Interpolator.h"
#include "../Util.h"
namespace rgbcx {
/**
* Lookup table for single-color blocks
* @tparam B Number of bits (5 or 6)
* @tparam N Number of colors (3 or 4)
*/
template <size_t B, size_t N> class SingleColorTable {
public:
struct MatchEntry {
uint8_t high;
uint8_t low;
uint8_t error;
};
using MatchList = std::array<MatchEntry, 256>;
using MatchListPtr = std::shared_ptr<MatchList>;
using InterpolatorPtr = std::shared_ptr<Interpolator>;
SingleColorTable(InterpolatorPtr interpolator) {
static_assert((B == 5 && Size == 32) || (B == 6 && Size == 64));
static_assert(N == 4 || N == 3);
bool ideal = interpolator->IsIdeal();
bool use_8bit = interpolator->CanInterpolate8Bit();
for (unsigned i = 0; i < 256; i++) {
unsigned error = 256;
// TODO: Can probably avoid testing for values that definitely wont yield good results,
// e.g. low8 and high8 both much smaller or larger than index
for (uint8_t low = 0; low < Size; low++) {
uint8_t low8 = (B == 5) ? scale5To8(low) : scale6To8(low);
for (uint8_t high = 0; high < Size; high++) {
uint8_t high8 = (B == 5) ? scale5To8(high) : scale6To8(high);
uint8_t value;
if (use_8bit) {
value = interpolator->Interpolate8(high8, low8);
} else {
value = (B == 5) ? interpolator->Interpolate5(high, low) : interpolator->Interpolate6(high, low);
}
unsigned new_error = iabs(value - (int)i);
// We only need to factor in 3% error in BC1 ideal mode.
if (ideal) new_error += (iabs(high8 - (int)low8) * 3) / 100;
if ((new_error < error) || (new_error == error && low == high)) {
assert(new_error <= UINT8_MAX);
(*_matches)[i].low = (uint8_t)low;
(*_matches)[i].high = (uint8_t)high;
(*_matches)[i].error = (uint8_t)new_error;
error = new_error;
}
}
}
}
}
MatchEntry operator[](size_t index) const {
assert(index <= UINT8_MAX);
return (*_matches)[index];
}
MatchEntry &operator[](size_t index) {
assert(index <= UINT8_MAX);
return (*_matches)[index];
}
private:
static inline constexpr size_t Size = 1 << B;
MatchListPtr _matches = std::make_shared<MatchList>();
};
} // namespace rgbcx

View File

@ -4,6 +4,13 @@
#pragma once
#include <cstdint>
// This table is: 9 * (w * w), 9 * ((1.0f - w) * w), 9 * ((1.0f - w) * (1.0f - w))
// where w is [0,1/3,2/3,1]. 9 is the perfect multiplier.
static constexpr uint32_t g_weight_vals4[4] = {0x000009, 0x010204, 0x040201, 0x090000};
// multiplier is 4 for 3-color
static constexpr uint32_t g_weight_vals3[3] = {0x000004, 0x040000, 0x010101};
const uint32_t MIN_TOTAL_ORDERINGS = 1;
const uint32_t MAX_TOTAL_ORDERINGS3 = 32;

View File

@ -36,7 +36,7 @@ template <class B, size_t M, size_t N> class BlockEncoder {
virtual void EncodeBlock(DecodedBlock pixels, EncodedBlock *dest) const = 0;
void EncodeImage(uint8_t *encoded, Color *decoded, unsigned image_width, unsigned image_height) {
virtual void EncodeImage(uint8_t *encoded, Color *decoded, unsigned image_width, unsigned image_height) {
assert(image_width % N == 0);
assert(image_width % M == 0);

View File

@ -20,8 +20,8 @@
#pragma once
#include <array>
#include <functional>
#include <cmath>
#include <functional>
#include "Color.h"
@ -29,7 +29,9 @@ namespace rgbcx {
class Vector4 {
public:
Vector4(float x = 0, float y = 0, float z = 0, float w = 0) {
Vector4() : Vector4(0) {}
Vector4(float x, float y, float z = 0, float w = 0) {
_c[0] = x;
_c[1] = y;
_c[2] = z;
@ -96,6 +98,9 @@ class Vector4 {
return max;
}
unsigned int SqrMag() { return (unsigned)Dot(*this, *this); }
private:
template <typename Op> friend Vector4 DoOp(const Vector4 &lhs, const Vector4 &rhs, Op f) {
Vector4 r;

View File

@ -49,7 +49,7 @@ class Vector4Int {
static Vector4Int FromColorRGB(const Color &c) { return Vector4Int(c.r, c.g, c.b); }
static int Dot(Vector4Int &lhs, Vector4Int &rhs) {
static int Dot(const Vector4Int &lhs, const Vector4Int &rhs) {
int sum = 0;
for (unsigned i = 0; i < 4; i++) { sum += lhs[i] * rhs[i]; }
return sum;
@ -86,7 +86,7 @@ class Vector4Int {
friend Vector4Int &operator*=(Vector4Int &lhs, const int &rhs) { return lhs = lhs * rhs; }
friend Vector4Int &operator/=(Vector4Int &lhs, const int &rhs) { return lhs = lhs / rhs; }
int Dot(Vector4Int other) { return Dot(*this, other); }
int Dot(const Vector4Int &other) const { return Dot(*this, other); }
int MaxAbs(unsigned channels = 4) {
assert(channels < 5);
assert(channels > 0);

View File

@ -39,7 +39,7 @@ template <typename S> constexpr auto iabs(S i) {
/**
* Unpacks an unsigned integer into an array of smaller integers.
* @tparam I Input data type. Must be an unsigned integral type large enough to hold C * S bits.
* @tparam I Input data type. Must be an unsigned integral type large enough to hold C * N bits.
* @tparam O Output data type. must be an unsigned integral type large enough to hold C bits..
* @tparam S Number of bits in each value.
* @tparam C Number of values to unpack.
@ -53,7 +53,7 @@ template <typename I, typename O, size_t S, size_t C> constexpr auto Unpack(I pa
static_assert(std::numeric_limits<I>::digits >= (C * S), "Packed input type must be big enough to represent the number of bits multiplied by count");
static_assert(std::numeric_limits<O>::digits >= S, "Unpacked output type must be big enough to represent the number of bits");
constexpr O mask = (1U << S) - 1U; // maximum value representable by S bits
constexpr O mask = (1U << S) - 1U; // maximum value representable by N bits
std::array<O, C> vals; // output values array of size C
for (unsigned i = 0; i < C; i++) {
@ -67,7 +67,7 @@ template <typename I, typename O, size_t S, size_t C> constexpr auto Unpack(I pa
/**
* Packs an array of unsigned integers into a single integer.
* @tparam I Input data type. Must be an unsigned integral type large enough to hold C bits.
* @tparam O Output data type. must be an unsigned integral type large enough to hold C * S bits.
* @tparam O Output data type. must be an unsigned integral type large enough to hold C * N bits.
* @tparam S Number of bits in each value.
* @tparam C Number of values to unpack.
* @param vals Unpacked std::array of type I and size C.