Interpolator match table caching

idk I dont like this might move it to the compressor class
This commit is contained in:
Andrew Cassidy 2021-02-09 03:46:39 -08:00
parent 8cd1e60394
commit d678567dc7
2 changed files with 177 additions and 39 deletions

View File

@ -19,53 +19,118 @@
#include "interpolator.h"
#include <array>
#include <cassert>
#include <cstdint>
#include "util.h"
namespace rgbcx {
Interpolator::Interpolator() {
PrepSingleColorTables(_single_match5, _single_match5_half, 5);
PrepSingleColorTables(_single_match5, _single_match5_half, 6);
}
void Interpolator::PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len) {
int size = 1 << len;
assert((len == 5 && size == size5) || (len == 6 && size == size6));
const uint8_t *expand = (len == 5) ? &Expand5[0] : &Expand6[0];
bool ideal = isIdeal();
bool use_e = useExpandedInMatch();
for (int i = 0; i < match_count; i++) {
int lowest_error = 256;
int lowest_half_error = 256;
for (int low = 0; low < size; low++) {
const int low_e = expand[low];
const int low_val = use_e ? low_e : low;
for (int high = 0; high < size; high++) {
const int high_e = expand[high];
const int high_val = use_e ? high_e : high;
int v = (len == 5) ? Interpolate5(high_val, low_val) : Interpolate6(high_val, low_val);
int v_half = (len == 5) ? InterpolateHalf5(low_val, high_val) : InterpolateHalf6(low_val, high_val);
int error = PrepSingleColorTableEntry(matchTable, v, i, low, high, low_e, high_e, lowest_error, false, ideal);
int half_error = PrepSingleColorTableEntry(matchTableHalf, v, i, low, high, low_e, high_e, lowest_error, true, ideal);
if (error < lowest_error) lowest_error = error;
if (half_error < lowest_half_error) lowest_half_error = half_error;
}
}
}
}
int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error,
bool half, bool ideal) {
int e = iabs(v - i);
// We only need to factor in 3% error in BC1 ideal mode.
if (ideal) e += (iabs(high_e - low_e) * 3) / 100;
// Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
if ((e < lowest_error) || (e == lowest_error && low == high)) {
assert(e <= UINT8_MAX);
auto &entry = (*matchTable)[i];
entry.low = low;
entry.high = high;
entry.error = e;
}
return e;
}
// region InterpolatorIdeal implementation
int rgbcx::InterpolatorIdeal::Interpolate5(int v0, int v1) { return Interpolate5or6(v0, v1); }
int rgbcx::InterpolatorIdeal::Interpolate6(int v0, int v1) { return Interpolate5or6(v0, v1); }
int rgbcx::InterpolatorIdeal::InterpolateHalf5(int v0, int v1) { return InterpolateHalf5or6(v0, v1); }
int rgbcx::InterpolatorIdeal::InterpolateHalf6(int v0, int v1) { return InterpolateHalf5or6(v0, v1); }
int InterpolatorIdeal::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); }
int InterpolatorIdeal::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); }
int InterpolatorIdeal::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
int InterpolatorIdeal::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
int rgbcx::InterpolatorIdeal::Interpolate5or6(int v0, int v1) {
int InterpolatorIdeal::Interpolate5or6(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 * 2 + v1) / 3;
}
int rgbcx::InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) {
int InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 + v1) / 2;
}
// endregion
// region InterpolatorIdealRound implementation
int rgbcx::InterpolatorIdealRound::Interpolate5(int v0, int v1) { return Interpolate5or6Round(v0, v1); }
int rgbcx::InterpolatorIdealRound::Interpolate6(int v0, int v1) { return Interpolate5or6Round(v0, v1); }
int InterpolatorIdealRound::Interpolate5(int v0, int v1) const { return Interpolate5or6Round(v0, v1); }
int InterpolatorIdealRound::Interpolate6(int v0, int v1) const { return Interpolate5or6Round(v0, v1); }
int rgbcx::InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) {
int InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 * 2 + v1 + 1) / 3;
}
// endregion
// region InterpolatorNvidia implementation
int rgbcx::InterpolatorNvidia::Interpolate5(int v0, int v1) {
int InterpolatorNvidia::Interpolate5(int v0, int v1) const {
assert(v0 < 32 && v1 < 32);
return ((2 * v0 + v1) * 22) / 8;
}
int rgbcx::InterpolatorNvidia::Interpolate6(int v0, int v1) {
int InterpolatorNvidia::Interpolate6(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
const int gdiff = v1 - v0;
return (256 * v0 + (gdiff / 4) + 128 + gdiff * 80) / 256;
}
int rgbcx::InterpolatorNvidia::InterpolateHalf5(int v0, int v1) {
int InterpolatorNvidia::InterpolateHalf5(int v0, int v1) const {
assert(v0 < 32 && v1 < 32);
return ((v0 + v1) * 33) / 8;
}
int rgbcx::InterpolatorNvidia::InterpolateHalf6(int v0, int v1) {
int InterpolatorNvidia::InterpolateHalf6(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
const int gdiff = v1 - v0;
return (256 * v0 + gdiff / 4 + 128 + gdiff * 128) / 256;
@ -73,18 +138,19 @@ int rgbcx::InterpolatorNvidia::InterpolateHalf6(int v0, int v1) {
// endregion
// region InterpolatorAMD implementation
int rgbcx::InterpolatorAMD::Interpolate5(int v0, int v1) { return Interpolate5or6(v0, v1); }
int rgbcx::InterpolatorAMD::Interpolate6(int v0, int v1) { return Interpolate5or6(v0, v1); }
int rgbcx::InterpolatorAMD::InterpolateHalf5(int v0, int v1) { return InterpolateHalf5or6(v0, v1); }
int rgbcx::InterpolatorAMD::InterpolateHalf6(int v0, int v1) { return InterpolateHalf5or6(v0, v1); }
int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); }
int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); }
int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
int rgbcx::InterpolatorAMD::Interpolate5or6(int v0, int v1) {
int InterpolatorAMD::Interpolate5or6(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 * 43 + v1 * 21 + 32) >> 6;
}
int rgbcx::InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) {
int InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 + v1 + 1) >> 1;
}
// endregion
} // namespace rgbcx

View File

@ -18,56 +18,128 @@
*/
#pragma once
#include <array>
#include <cassert>
#include <cstdint>
#include <memory>
#include "util.h"
#ifdef NDEBUG // asserts disabled
static constexpr bool ndebug = true;
#else // asserts enabled
static constexpr bool ndebug = false;
#endif
namespace rgbcx {
template <size_t size, int op(int)> static constexpr std::array<uint8_t, size> ExpandArray() {
std::array<uint8_t, size> res;
for (int i = 0; i < size; i++) { res[i] = op(i); }
return res;
}
class Interpolator {
public:
struct MatchEntry {
uint8_t high;
uint8_t low;
uint8_t error;
};
Interpolator();
virtual ~Interpolator() noexcept = default;
virtual int Interpolate5(int v0, int v1) = 0;
virtual int Interpolate6(int v0, int v1) = 0;
virtual int InterpolateHalf5(int v0, int v1) = 0;
virtual int InterpolateHalf6(int v0, int v1) = 0;
virtual ~Interpolator() noexcept = default;
constexpr MatchEntry GetMatch5(int i) noexcept(ndebug) {
assert(i < match_count);
return (*_single_match5)[i];
}
constexpr MatchEntry GetMatch6(int i) noexcept(ndebug) {
assert(i < match_count);
return (*_single_match6)[i];
}
constexpr MatchEntry GetMatchHalf5(int i) noexcept(ndebug) {
assert(i < match_count);
return (*_single_match5_half)[i];
}
constexpr MatchEntry GetMatchHalf6(int i) noexcept(ndebug) {
assert(i < match_count);
return (*_single_match6_half)[i];
}
private:
constexpr static inline size_t size5 = 32;
constexpr static inline size_t size6 = 64;
constexpr static inline size_t match_count = 256;
constexpr static auto Expand5 = ExpandArray<size5, scale5To8>();
constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
// match tables used for single-color blocks
using MatchList = std::array<MatchEntry, match_count>;
using MatchListPtr = std::shared_ptr<MatchList>;
const MatchListPtr _single_match5 = {std::make_shared<MatchList>()};
const MatchListPtr _single_match6 = {std::make_shared<MatchList>()};
const MatchListPtr _single_match5_half = {std::make_shared<MatchList>()};
const MatchListPtr _single_match6_half = {std::make_shared<MatchList>()};
virtual constexpr bool isIdeal() noexcept { return false; }
virtual constexpr bool useExpandedInMatch() noexcept { return true; }
void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len);
int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
bool ideal);
};
class InterpolatorIdeal : public Interpolator {
public:
virtual int Interpolate5(int v0, int v1);
virtual int Interpolate6(int v0, int v1);
virtual int InterpolateHalf5(int v0, int v1);
virtual int InterpolateHalf6(int v0, int v1);
virtual int Interpolate5(int v0, int v1) const;
virtual int Interpolate6(int v0, int v1) const;
virtual int InterpolateHalf5(int v0, int v1) const;
virtual int InterpolateHalf6(int v0, int v1) const;
private:
int Interpolate5or6(int v0, int v1);
int InterpolateHalf5or6(int v0, int v1);
int Interpolate5or6(int v0, int v1) const;
int InterpolateHalf5or6(int v0, int v1) const;
virtual constexpr bool isIdeal() noexcept override { return true; }
};
class InterpolatorIdealRound : public InterpolatorIdeal {
public:
virtual int Interpolate5(int v0, int v1);
virtual int Interpolate6(int v0, int v1);
virtual int Interpolate5(int v0, int v1) const override;
virtual int Interpolate6(int v0, int v1) const override;
private:
int Interpolate5or6Round(int v0, int v1);
int Interpolate5or6Round(int v0, int v1) const;
};
class InterpolatorNvidia : public Interpolator {
public:
virtual int Interpolate5(int v0, int v1);
virtual int Interpolate6(int v0, int v1);
virtual int InterpolateHalf5(int v0, int v1);
virtual int InterpolateHalf6(int v0, int v1);
virtual int Interpolate5(int v0, int v1) const;
virtual int Interpolate6(int v0, int v1) const;
virtual int InterpolateHalf5(int v0, int v1) const;
virtual int InterpolateHalf6(int v0, int v1) const;
private:
virtual constexpr bool useExpandedInMatch() noexcept override { return false; }
};
class InterpolatorAMD : public Interpolator {
public:
virtual int Interpolate5(int v0, int v1);
virtual int Interpolate6(int v0, int v1);
virtual int InterpolateHalf5(int v0, int v1);
virtual int InterpolateHalf6(int v0, int v1);
virtual int Interpolate5(int v0, int v1) const;
virtual int Interpolate6(int v0, int v1) const;
virtual int InterpolateHalf5(int v0, int v1) const;
virtual int InterpolateHalf6(int v0, int v1) const;
private:
int Interpolate5or6(int v0, int v1);
int InterpolateHalf5or6(int v0, int v1);
int Interpolate5or6(int v0, int v1) const;
int InterpolateHalf5or6(int v0, int v1) const;
};
} // namespace rgbcx