Fix interpolators which I didnt understand before

I have zero clue what I'm doing
faster-single-tables
Andrew Cassidy 3 years ago
parent d678567dc7
commit 1cf01f6f01

@ -0,0 +1,44 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "BC1Decoder.h"
#include <array>
#include "ColorBlock.h"
void rgbcx::BC1Decoder::DecodeBlock(const Color4x4 *dest, const BC1Block *block) {
const unsigned l = block->GetLowColor();
const unsigned h = block->GetHighColor();
const auto l_color = Color32::Unpack565(l);
const auto h_color = Color32::Unpack565(h);
std::array<Color32, 4> colors;
colors[0] = l_color;
colors[1] = h_color;
bool three_color = (h >= l);
if (three_color) {
colors[2] = _interpolator.InterpolateHalfColor(l_color, h_color);
colors[3] = Color32(0,0,0);
} else {
colors[2] = _interpolator.InterpolateColor()
}
}

@ -0,0 +1,37 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "BlockDecoder.h"
#include "blocks.h"
#include "interpolator.h"
namespace rgbcx {
class BC1Decoder : public BlockDecoder<BC1Block, 4, 4> {
public:
BC1Decoder(const Interpolator &interpolator) : _interpolator(interpolator) {}
BC1Decoder() : BC1Decoder(Interpolator()) {}
void DecodeBlock(const Color4x4 *dest, const BC1Block *block) override;
private:
const Interpolator &_interpolator;
};
} // namespace rgbcx

@ -17,6 +17,18 @@
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "blocks.h"
#pragma once
// endregion
#include <cstdint>
#include "ColorBlock.h"
namespace rgbcx {
template <class B, size_t M, size_t N> class BlockDecoder {
public:
using DecodedBlock = ColorBlock<M, N, Color32>;
using EncodedBlock = B;
virtual void DecodeBlock(const DecodedBlock *dest, const EncodedBlock *block) = 0;
};
} // namespace rgbcx

@ -0,0 +1,34 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdint>
#include "ColorBlock.h"
namespace rgbcx {
template <class B, size_t M, size_t N> class BlockEncoder {
public:
using DecodedBlock = ColorBlock<M, N, Color32>;
using EncodedBlock = B;
virtual void EncodeBlock(const EncodedBlock *dest, const DecodedBlock *pixels) = 0;
};
} // namespace rgbcx

@ -20,6 +20,7 @@
#pragma once
#include <array>
#include <cassert>
#include <cstdint>
#include <span>
@ -41,13 +42,13 @@ template <size_t M, size_t N, class T> class ColorBlock {
}
ColorBlock(const T *image, int imageWidth, int imageHeight, int x = 0, int y = 0) {
int imageX = x * width();
int imageY = y * height();
int image_x = x * width();
int image_y = y * height();
assert(imageX > 0 && imageX + width() < imageWidth);
assert(imageY > 0 && imageY + height() < imageHeight);
assert(image_x > 0 && image_x + width() < imageWidth);
assert(image_y > 0 && image_y + height() < imageHeight);
T *start = &image[imageX + (imageY * imageWidth)];
T *start = &image[image_x + (image_y * imageWidth)];
for (int i = 0; i < height(); i++) { _rows[i] = std::span(start[i * imageWidth]); }
}
@ -62,5 +63,4 @@ template <size_t M, size_t N, class T> class ColorBlock {
std::array<row, M> _rows;
};
using RGBABlock4x4 = ColorBlock<4, 4, Color32>;
using RBlock4x4 = ColorBlock<4, 4, uint8_t>;
using Color4x4= ColorBlock<4, 4, Color32>;

@ -0,0 +1,58 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <type_traits>
// Thanks dkavolis
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator~(E a) noexcept -> E {
using Base = std::underlying_type_t<E>;
return static_cast<E>(~static_cast<Base>(a));
}
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator|(E a, E b) noexcept -> E {
using Base = std::underlying_type_t<E>;
return static_cast<E>(static_cast<Base>(a) | static_cast<Base>(b));
}
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator&(E a, E b) noexcept -> E {
using Base = std::underlying_type_t<E>;
return static_cast<E>(static_cast<Base>(a) & static_cast<Base>(b));
}
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator^(E a, E b) noexcept -> E {
using Base = std::underlying_type_t<E>;
return static_cast<E>(static_cast<Base>(a) ^ static_cast<Base>(b));
}
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator|=(E& a, E b) noexcept -> E& {
a = a | b;
return a;
}
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator&=(E& a, E b) noexcept -> E& {
a = a & b;
return a;
}
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator^=(E& a, E b) noexcept -> E& {
a = a ^ b;
return a;
}

@ -31,8 +31,8 @@ class BC1Block {
public:
uint16_t GetLowColor() const { return _low_color[0] | _low_color[1] << 8U; }
uint16_t GetHighColor() const { return _high_color[0] | _high_color[1] << 8U; }
Color32 GetLowColor32() const { return Color32::unpack565(GetLowColor()); }
Color32 GetHighColor32() const { return Color32::unpack565(GetHighColor()); }
Color32 GetLowColor32() const { return Color32::Unpack565(GetLowColor()); }
Color32 GetHighColor32() const { return Color32::Unpack565(GetHighColor()); }
bool Is3Color() const { return GetLowColor() <= GetHighColor(); }
void SetLowColor(uint16_t c) {

@ -25,15 +25,15 @@
#include "util.h"
// region Color32 implementation
Color32::Color32() { set(0, 0, 0, 0xFF); }
Color32::Color32() { Set(0, 0, 0, 0xFF); }
Color32::Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { set(R, G, B, A); }
Color32::Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { Set(R, G, B, A); }
uint16_t Color32::pack565Unscaled(uint16_t R, uint16_t G, uint16_t B) { return B | (G << 5) | (R << 11); }
uint16_t Color32::Pack565Unscaled(uint16_t R, uint16_t G, uint16_t B) { return B | (G << 5) | (R << 11); }
uint16_t Color32::pack565(uint16_t R, uint16_t G, uint16_t B) { return pack565Unscaled(scale8To5(R), scale8To6(G), scale8To5(B)); }
uint16_t Color32::Pack565(uint16_t R, uint16_t G, uint16_t B) { return Pack565Unscaled(scale8To5(R), scale8To6(G), scale8To5(B)); }
Color32 Color32::unpack565(uint16_t Packed) {
Color32 Color32::Unpack565(uint16_t Packed) {
uint8_t R = scale5To8((Packed >> 11) & 0x1F);
uint8_t G = scale6To8((Packed >> 5) & 0x3F);
uint8_t B = scale5To8(Packed & 0x1F);
@ -43,26 +43,26 @@ Color32 Color32::unpack565(uint16_t Packed) {
uint8_t Color32::operator[](uint32_t Index) const {
assert(Index < 4);
return C[Index];
return c[Index];
}
uint8_t &Color32::operator[](uint32_t Index) {
assert(Index < 4);
return C[Index];
return c[Index];
}
void Color32::set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) {
this->R = R;
this->G = G;
this->B = B;
this->A = A;
void Color32::Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) {
this->r = R;
this->g = G;
this->b = B;
this->a = A;
}
void Color32::set(const Color32 &Other) {
this->R = Other.R;
this->G = Other.G;
this->B = Other.B;
this->A = Other.A;
void Color32::Set(const Color32 &Other) {
this->r = Other.r;
this->g = Other.g;
this->b = Other.b;
this->a = Other.a;
}
Color32 Color32::min(const Color32 &a, const Color32 &b) {
@ -73,7 +73,7 @@ Color32 Color32::max(const Color32 &a, const Color32 &b) {
return Color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3]));
}
uint16_t Color32::pack565() { return pack565(R, G, B); }
uint16_t Color32::pack565() { return Pack565(r, g, b); }
uint16_t Color32::pack565Unscaled() { return pack565Unscaled(R, G, B); }
uint16_t Color32::pack565Unscaled() { return Pack565Unscaled(r, g, b); }
// endregion

@ -18,6 +18,7 @@
*/
#pragma once
#include <array>
#include <cstdint>
#pragma pack(push, 1)
@ -25,25 +26,25 @@ class Color32 {
public:
union {
struct {
uint8_t R;
uint8_t G;
uint8_t B;
uint8_t A;
uint8_t r;
uint8_t g;
uint8_t b;
uint8_t a;
};
uint8_t C[4];
std::array<uint8_t, 4> c;
};
Color32();
Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A = 0xFF);
static uint16_t pack565Unscaled(uint16_t R, uint16_t G, uint16_t B);
static uint16_t pack565(uint16_t R, uint16_t G, uint16_t B);
static uint16_t Pack565Unscaled(uint16_t R, uint16_t G, uint16_t B);
static uint16_t Pack565(uint16_t R, uint16_t G, uint16_t B);
static Color32 unpack565(uint16_t Packed);
static Color32 Unpack565(uint16_t Packed);
bool operator==(const Color32 &Rhs) const { return R == Rhs.R && G == Rhs.G && B == Rhs.B && A == Rhs.A; }
bool operator==(const Color32 &Rhs) const { return r == Rhs.r && g == Rhs.g && b == Rhs.b && a == Rhs.a; }
uint8_t operator[](uint32_t Index) const;
uint8_t &operator[](uint32_t Index);
@ -54,8 +55,8 @@ class Color32 {
static Color32 min(const Color32 &A, const Color32 &B);
static Color32 max(const Color32 &A, const Color32 &B);
void set(uint8_t R, uint8_t G, uint8_t B, uint8_t A);
void Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A);
void set(const Color32 &Other);
void Set(const Color32 &Other);
};
#pragma pack(pop)

@ -27,6 +27,7 @@
namespace rgbcx {
/*
Interpolator::Interpolator() {
PrepSingleColorTables(_single_match5, _single_match5_half, 5);
PrepSingleColorTables(_single_match5, _single_match5_half, 6);
@ -66,8 +67,8 @@ void Interpolator::PrepSingleColorTables(const MatchListPtr &matchTable, const M
}
}
}
int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error,
bool half, bool ideal) {
int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
bool ideal) {
int e = iabs(v - i);
// We only need to factor in 3% error in BC1 ideal mode.
@ -84,30 +85,48 @@ int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int
}
return e;
}
}*/
// region Interpolator implementation
int Interpolator::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
int Interpolator::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
int Interpolator::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
int Interpolator::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
std::array<Color32, 4> Interpolator::InterpolateBC1(uint16_t low, uint16_t high) {
auto colors = std::array<Color32, 4>();
colors[0] = Color32::Unpack565(low);
colors[1] = Color32::Unpack565(high);
if (low > high) {
// 4-color mode
colors[2] = InterpolateColor24(colors[0], colors[1]);
colors[3] = InterpolateColor24(colors[1], colors[0]);
} else {
// 3-color mode
colors[2] = InterpolateHalfColor24(colors[0], colors[1]);
colors[3] = Color32(0, 0, 0, 0); // transparent black
}
// region InterpolatorIdeal implementation
int InterpolatorIdeal::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); }
int InterpolatorIdeal::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); }
int InterpolatorIdeal::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
int InterpolatorIdeal::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
return colors;
}
int InterpolatorIdeal::Interpolate5or6(int v0, int v1) const {
int Interpolator::Interpolate8(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 * 2 + v1) / 3;
}
int InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) const {
int Interpolator::InterpolateHalf8(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 + v1) / 2;
}
// endregion
// region InterpolatorIdealRound implementation
int InterpolatorIdealRound::Interpolate5(int v0, int v1) const { return Interpolate5or6Round(v0, v1); }
int InterpolatorIdealRound::Interpolate6(int v0, int v1) const { return Interpolate5or6Round(v0, v1); }
// region InterpolatorRound implementation
int InterpolatorRound::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
int InterpolatorRound::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
int InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) const {
int InterpolatorRound::Interpolate8(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 * 2 + v1 + 1) / 3;
}
@ -120,7 +139,7 @@ int InterpolatorNvidia::Interpolate5(int v0, int v1) const {
}
int InterpolatorNvidia::Interpolate6(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
assert(v0 < 64 && v1 < 64);
const int gdiff = v1 - v0;
return (256 * v0 + (gdiff / 4) + 128 + gdiff * 80) / 256;
}
@ -131,24 +150,24 @@ int InterpolatorNvidia::InterpolateHalf5(int v0, int v1) const {
}
int InterpolatorNvidia::InterpolateHalf6(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
assert(v0 < 64 && v1 < 64);
const int gdiff = v1 - v0;
return (256 * v0 + gdiff / 4 + 128 + gdiff * 128) / 256;
}
// endregion
// region InterpolatorAMD implementation
int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); }
int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); }
int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
int InterpolatorAMD::Interpolate5or6(int v0, int v1) const {
int InterpolatorAMD::Interpolate8(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 * 43 + v1 * 21 + 32) >> 6;
}
int InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) const {
int InterpolatorAMD::InterpolateHalf8(int v0, int v1) const {
assert(v0 < 256 && v1 < 256);
return (v0 + v1 + 1) >> 1;
}

@ -23,6 +23,7 @@
#include <cstdint>
#include <memory>
#include "color.h"
#include "util.h"
#ifdef NDEBUG // asserts disabled
@ -41,105 +42,122 @@ template <size_t size, int op(int)> static constexpr std::array<uint8_t, size> E
class Interpolator {
public:
struct MatchEntry {
uint8_t high;
uint8_t low;
uint8_t error;
};
// struct MatchEntry {
// uint8_t high;
// uint8_t low;
// uint8_t error;
// };
Interpolator();
virtual ~Interpolator() noexcept = default;
virtual int Interpolate5(int v0, int v1) = 0;
virtual int Interpolate6(int v0, int v1) = 0;
virtual int InterpolateHalf5(int v0, int v1) = 0;
virtual int InterpolateHalf6(int v0, int v1) = 0;
constexpr MatchEntry GetMatch5(int i) noexcept(ndebug) {
assert(i < match_count);
return (*_single_match5)[i];
}
constexpr MatchEntry GetMatch6(int i) noexcept(ndebug) {
assert(i < match_count);
return (*_single_match6)[i];
}
constexpr MatchEntry GetMatchHalf5(int i) noexcept(ndebug) {
assert(i < match_count);
return (*_single_match5_half)[i];
}
constexpr MatchEntry GetMatchHalf6(int i) noexcept(ndebug) {
assert(i < match_count);
return (*_single_match6_half)[i];
}
private:
constexpr static inline size_t size5 = 32;
constexpr static inline size_t size6 = 64;
constexpr static inline size_t match_count = 256;
constexpr static auto Expand5 = ExpandArray<size5, scale5To8>();
constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
// match tables used for single-color blocks
using MatchList = std::array<MatchEntry, match_count>;
using MatchListPtr = std::shared_ptr<MatchList>;
const MatchListPtr _single_match5 = {std::make_shared<MatchList>()};
const MatchListPtr _single_match6 = {std::make_shared<MatchList>()};
const MatchListPtr _single_match5_half = {std::make_shared<MatchList>()};
const MatchListPtr _single_match6_half = {std::make_shared<MatchList>()};
virtual constexpr bool isIdeal() noexcept { return false; }
virtual constexpr bool useExpandedInMatch() noexcept { return true; }
void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len);
int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
bool ideal);
};
virtual ~Interpolator() noexcept = default;
class InterpolatorIdeal : public Interpolator {
public:
/**
* Performs a 2/3 interpolation of a pair of 5-bit values to produce an 8-bit value
* Output is approximately (2v0 + v1)/3, with v0 and v1 first extended to 8 bits.
* @param v0 The first 5-bit value
* @param v1 The second 5-bit value
* @return The interpolated value
*/
virtual int Interpolate5(int v0, int v1) const;
/**
* Performs a 2/3 interpolation of a pair of 5-bit values to produce an 8-bit value
* Output is approximately (2v0 + v1)/3, with v0 and v1 first extended to 8 bits.
* @param v0 The first 5-bit value
* @param v1 The second 5-bit value
* @return The interpolated value
*/
virtual int Interpolate6(int v0, int v1) const;
/**
* Performs a 1/2 interpolation of a pair of 5-bit values to produce an 8-bit value
* Output is approximately (v0 + v1)/2, with v0 and v1 first extended to 8 bits.
* @param v0 The first 5-bit value
* @param v1 The second 5-bit value
* @return The interpolated value
*/
virtual int InterpolateHalf5(int v0, int v1) const;
/**
* Performs a 1/2 interpolation of a pair of 6-bit values to produce an 8-bit value
* Output is approximately (v0 + v1)/2, with v0 and v1 first extended to 8 bits.
* @param v0 The first 6-bit value
* @param v1 The second 6-bit value
* @return The interpolated value
*/
virtual int InterpolateHalf6(int v0, int v1) const;
/**
* Generates the 4 colors for a BC1 block from the given 5:6:5-packed colors
* @param low first 5:6:5 color for the block
* @param high second 5:6:5 color for the block
* @return and array of 4 Color32 values, with indices matching BC1 selectors
*/
std::array<Color32, 4> InterpolateBC1(uint16_t low, uint16_t high);
private:
int Interpolate5or6(int v0, int v1) const;
int InterpolateHalf5or6(int v0, int v1) const;
virtual constexpr bool isIdeal() noexcept override { return true; }
virtual int Interpolate8(int v0, int v1) const;
virtual int InterpolateHalf8(int v0, int v1) const;
// constexpr static auto Expand5 = ExpandArray<size5, scale5To8>();
// constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
//
// // match tables used for single-color blocks
// using MatchList = std::array<MatchEntry, match_count>;
// using MatchListPtr = std::shared_ptr<MatchList>;
//
// const MatchListPtr _single_match5 = {std::make_shared<MatchList>()};
// const MatchListPtr _single_match6 = {std::make_shared<MatchList>()};
// const MatchListPtr _single_match5_half = {std::make_shared<MatchList>()};
// const MatchListPtr _single_match6_half = {std::make_shared<MatchList>()};
Color32 InterpolateColor24(const Color32 &c0, const Color32 &c1) const {
return Color32(Interpolate8(c0.r, c1.r), Interpolate8(c0.g, c1.g), Interpolate8(c0.b, c1.b));
}
Color32 InterpolateHalfColor24(const Color32 &c0, const Color32 &c1) const {
return Color32(InterpolateHalf8(c0.r, c1.r), InterpolateHalf8(c0.g, c1.g), InterpolateHalf8(c0.b, c1.b));
}
virtual constexpr bool isIdeal() noexcept { return true; }
// virtual constexpr bool useExpandedInMatch() noexcept { return true; }
//
// void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len);
//
// int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
// bool ideal);
};
class InterpolatorIdealRound : public InterpolatorIdeal {
class InterpolatorRound : public Interpolator {
public:
virtual int Interpolate5(int v0, int v1) const override;
virtual int Interpolate6(int v0, int v1) const override;
int Interpolate5(int v0, int v1) const override;
int Interpolate6(int v0, int v1) const override;
private:
int Interpolate5or6Round(int v0, int v1) const;
int Interpolate8(int v0, int v1) const;
};
class InterpolatorNvidia : public Interpolator {
public:
virtual int Interpolate5(int v0, int v1) const;
virtual int Interpolate6(int v0, int v1) const;
virtual int InterpolateHalf5(int v0, int v1) const;
virtual int InterpolateHalf6(int v0, int v1) const;
private:
virtual constexpr bool useExpandedInMatch() noexcept override { return false; }
int Interpolate5(int v0, int v1) const override;
int Interpolate6(int v0, int v1) const override;
int InterpolateHalf5(int v0, int v1) const override;
int InterpolateHalf6(int v0, int v1) const override;
constexpr bool isIdeal() noexcept override { return false; }
};
class InterpolatorAMD : public Interpolator {
public:
virtual int Interpolate5(int v0, int v1) const;
virtual int Interpolate6(int v0, int v1) const;
virtual int InterpolateHalf5(int v0, int v1) const;
virtual int InterpolateHalf6(int v0, int v1) const;
int Interpolate5(int v0, int v1) const override;
int Interpolate6(int v0, int v1) const override;
int InterpolateHalf5(int v0, int v1) const override;
int InterpolateHalf6(int v0, int v1) const override;
constexpr bool isIdeal() noexcept override { return false; }
private:
int Interpolate5or6(int v0, int v1) const;
int InterpolateHalf5or6(int v0, int v1) const;
int Interpolate8(int v0, int v1) const;
int InterpolateHalf8(int v0, int v1) const;
};
} // namespace rgbcx

@ -476,7 +476,7 @@ static inline bool compute_least_squares_endpoints4_rgb(const Color32 *pColors,
uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0;
uint32_t weight_accum = 0;
for (uint32_t i = 0; i < 16; i++) {
const uint8_t r = pColors[i].C[0], g = pColors[i].C[1], b = pColors[i].C[2];
const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
const uint8_t sel = pSelectors[i];
weight_accum += g_weight_vals4[sel];
@ -547,7 +547,7 @@ static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const Co
uint32_t weight_accum = 0;
int total_r = 0, total_g = 0, total_b = 0;
for (uint32_t i = 0; i < 16; i++) {
const uint8_t r = pColors[i].C[0], g = pColors[i].C[1], b = pColors[i].C[2];
const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
if (use_black) {
if ((r | g | b) < 4) continue;
}
@ -684,10 +684,10 @@ static inline void bc1_find_sels4_noerr(const Color32 *pSrc_pixels, uint32_t lr,
static const uint8_t s_sels[4] = {3, 2, 1, 0};
for (uint32_t i = 0; i < 16; i += 4) {
const int d0 = pSrc_pixels[i + 0].R * ar + pSrc_pixels[i + 0].G * ag + pSrc_pixels[i + 0].B * ab;
const int d1 = pSrc_pixels[i + 1].R * ar + pSrc_pixels[i + 1].G * ag + pSrc_pixels[i + 1].B * ab;
const int d2 = pSrc_pixels[i + 2].R * ar + pSrc_pixels[i + 2].G * ag + pSrc_pixels[i + 2].B * ab;
const int d3 = pSrc_pixels[i + 3].R * ar + pSrc_pixels[i + 3].G * ag + pSrc_pixels[i + 3].B * ab;
const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab;
const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab;
const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab;
const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab;
sels[i + 0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
sels[i + 1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
@ -717,10 +717,10 @@ static inline uint32_t bc1_find_sels4_fasterr(const Color32 *pSrc_pixels, uint32
uint32_t total_err = 0;
for (uint32_t i = 0; i < 16; i += 4) {
const int d0 = pSrc_pixels[i + 0].R * ar + pSrc_pixels[i + 0].G * ag + pSrc_pixels[i + 0].B * ab;
const int d1 = pSrc_pixels[i + 1].R * ar + pSrc_pixels[i + 1].G * ag + pSrc_pixels[i + 1].B * ab;
const int d2 = pSrc_pixels[i + 2].R * ar + pSrc_pixels[i + 2].G * ag + pSrc_pixels[i + 2].B * ab;
const int d3 = pSrc_pixels[i + 3].R * ar + pSrc_pixels[i + 3].G * ag + pSrc_pixels[i + 3].B * ab;
const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab;
const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab;
const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab;
const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab;
uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
@ -733,13 +733,13 @@ static inline uint32_t bc1_find_sels4_fasterr(const Color32 *pSrc_pixels, uint32
sels[i + 3] = sel3;
total_err +=
squarei(pSrc_pixels[i + 0].R - block_r[sel0]) + squarei(pSrc_pixels[i + 0].G - block_g[sel0]) + squarei(pSrc_pixels[i + 0].B - block_b[sel0]);
squarei(pSrc_pixels[i + 0].r - block_r[sel0]) + squarei(pSrc_pixels[i + 0].g - block_g[sel0]) + squarei(pSrc_pixels[i + 0].b - block_b[sel0]);
total_err +=
squarei(pSrc_pixels[i + 1].R - block_r[sel1]) + squarei(pSrc_pixels[i + 1].G - block_g[sel1]) + squarei(pSrc_pixels[i + 1].B - block_b[sel1]);
squarei(pSrc_pixels[i + 1].r - block_r[sel1]) + squarei(pSrc_pixels[i + 1].g - block_g[sel1]) + squarei(pSrc_pixels[i + 1].b - block_b[sel1]);
total_err +=
squarei(pSrc_pixels[i + 2].R - block_r[sel2]) + squarei(pSrc_pixels[i + 2].G - block_g[sel2]) + squarei(pSrc_pixels[i + 2].B - block_b[sel2]);
squarei(pSrc_pixels[i + 2].r - block_r[sel2]) + squarei(pSrc_pixels[i + 2].g - block_g[sel2]) + squarei(pSrc_pixels[i + 2].b - block_b[sel2]);
total_err +=
squarei(pSrc_pixels[i + 3].R - block_r[sel3]) + squarei(pSrc_pixels[i + 3].G - block_g[sel3]) + squarei(pSrc_pixels[i + 3].B - block_b[sel3]);
squarei(pSrc_pixels[i + 3].r - block_r[sel3]) + squarei(pSrc_pixels[i + 3].g - block_g[sel3]) + squarei(pSrc_pixels[i + 3].b - block_b[sel3]);
if (total_err >= cur_err) break;
}
@ -759,9 +759,9 @@ static inline uint32_t bc1_find_sels4_check2_err(const Color32 *pSrc_pixels, uin
uint32_t total_err = 0;
for (uint32_t i = 0; i < 16; i++) {
const int r = pSrc_pixels[i].R;
const int g = pSrc_pixels[i].G;
const int b = pSrc_pixels[i].B;
const int r = pSrc_pixels[i].r;
const int g = pSrc_pixels[i].g;
const int b = pSrc_pixels[i].b;
int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f);
sel = clampi(sel, 1, 3);
@ -796,9 +796,9 @@ static inline uint32_t bc1_find_sels4_fullerr(const Color32 *pSrc_pixels, uint32
uint32_t total_err = 0;
for (uint32_t i = 0; i < 16; i++) {
const int r = pSrc_pixels[i].R;
const int g = pSrc_pixels[i].G;
const int b = pSrc_pixels[i].B;
const int r = pSrc_pixels[i].r;
const int g = pSrc_pixels[i].g;
const int b = pSrc_pixels[i].b;
uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b);
uint8_t best_sel = 0;
@ -842,9 +842,9 @@ static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const Color32 *pSr
uint32_t total_err = 0;
for (uint32_t i = 0; i < 16; i++) {
const int r = pSrc_pixels[i].R;
const int g = pSrc_pixels[i].G;
const int b = pSrc_pixels[i].B;
const int r = pSrc_pixels[i].r;
const int g = pSrc_pixels[i].g;
const int b = pSrc_pixels[i].b;
uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b);
uint32_t best_sel = 0;
@ -923,8 +923,8 @@ static inline void precise_round_565_noscale(vec3F xl, vec3F xh, int &trial_lr,
}
static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) {
uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb);
uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb);
uint16_t lc16 = Color32::Pack565Unscaled(lr, lg, lb);
uint16_t hc16 = Color32::Pack565Unscaled(hr, hg, hb);
// Always forbid 3 color blocks
if (lc16 == hc16) {
@ -974,8 +974,8 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int
}
static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) {
uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb);
uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb);
uint16_t lc16 = Color32::Pack565Unscaled(lr, lg, lb);
uint16_t hc16 = Color32::Pack565Unscaled(hr, hg, hb);
bool invert_flag = false;
if (lc16 > hc16) {
@ -1018,7 +1018,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags
int min_r = 255, min_g = 255, min_b = 255;
int total_pixels = 0;
for (uint32_t i = 0; i < 16; i++) {
const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
if ((r | g | b) < 4) continue;
max_r = std::max(max_r, r);
@ -1045,9 +1045,9 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags
int icov[6] = {0, 0, 0, 0, 0, 0};
for (uint32_t i = 0; i < 16; i++) {
int r = (int)pSrc_pixels[i].R;
int g = (int)pSrc_pixels[i].G;
int b = (int)pSrc_pixels[i].B;
int r = (int)pSrc_pixels[i].r;
int g = (int)pSrc_pixels[i].g;
int b = (int)pSrc_pixels[i].b;
if ((r | g | b) < 4) continue;
@ -1094,7 +1094,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags
int low_dot = INT_MAX, high_dot = INT_MIN;
for (uint32_t i = 0; i < 16; i++) {
int r = (int)pSrc_pixels[i].R, g = (int)pSrc_pixels[i].G, b = (int)pSrc_pixels[i].B;
int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b;
if ((r | g | b) < 4) continue;
@ -1109,13 +1109,13 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags
}
}
int lr = scale8To5(pSrc_pixels[low_c].R);
int lg = scale8To6(pSrc_pixels[low_c].G);
int lb = scale8To5(pSrc_pixels[low_c].B);
int lr = scale8To5(pSrc_pixels[low_c].r);
int lg = scale8To6(pSrc_pixels[low_c].g);
int lb = scale8To5(pSrc_pixels[low_c].b);
int hr = scale8To5(pSrc_pixels[high_c].R);
int hg = scale8To6(pSrc_pixels[high_c].G);
int hb = scale8To5(pSrc_pixels[high_c].B);
int hr = scale8To5(pSrc_pixels[high_c].r);
int hg = scale8To6(pSrc_pixels[high_c].g);
int hb = scale8To5(pSrc_pixels[high_c].b);
uint8_t trial_sels[16];
uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX);
@ -1236,9 +1236,9 @@ static bool try_3color_block(const Color32 *pSrc_pixels, uint32_t flags, uint32_
int dots[16];
for (uint32_t i = 0; i < 16; i++) {
int r = pSrc_pixels[i].R;
int g = pSrc_pixels[i].G;
int b = pSrc_pixels[i].B;
int r = pSrc_pixels[i].r;
int g = pSrc_pixels[i].g;
int b = pSrc_pixels[i].b;
int d = 0x1000000 + (r * ar + g * ag + b * ab);
assert(d >= 0);
dots[i] = (d << 4) + i;
@ -1255,9 +1255,9 @@ static bool try_3color_block(const Color32 *pSrc_pixels, uint32_t flags, uint32_
g_sum[i] = g;
b_sum[i] = b;
r += pSrc_pixels[p].R;
g += pSrc_pixels[p].G;
b += pSrc_pixels[p].B;
r += pSrc_pixels[p].r;
g += pSrc_pixels[p].g;
b += pSrc_pixels[p].b;
}
r_sum[16] = total_r;
@ -1338,7 +1338,7 @@ void encode_bc1(uint32_t level, void *pDst, const uint8_t *pPixels, bool allow_3
flags = cEncodeBC1BoundingBoxInt;
break;
case 1:
// Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0.
// Faster/higher quality than stb_dxt default. a bit higher average quality vs. mode 0.
flags = cEncodeBC1Use2DLS;
break;
case 2:
@ -1452,11 +1452,12 @@ void encode_bc1(uint32_t level, void *pDst, const uint8_t *pPixels, bool allow_3
encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3);
}
// Finds low and high colors to begin with
static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t flags, bool grayscale_flag, int min_r, int min_g, int min_b, int max_r,
int max_g, int max_b, int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b, int &lr, int &lg,
int &lb, int &hr, int &hg, int &hb) {
if (grayscale_flag) {
const int fr = pSrc_pixels[0].R;
const int fr = pSrc_pixels[0].r;
// Grayscale blocks are a common enough case to specialize.
if ((max_r - min_r) < 2) {
@ -1480,7 +1481,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
vec3F l, h;
if (big_chan == 0) {
for (uint32_t i = 0; i < 16; i++) {
const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
sum_xy_r += r * r, sum_xy_g += r * g, sum_xy_b += r * b;
}
@ -1521,7 +1522,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
h.c[0] = fmax_chan_val;
} else if (big_chan == 1) {
for (uint32_t i = 0; i < 16; i++) {
const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
sum_xy_r += g * r, sum_xy_g += g * g, sum_xy_b += g * b;
}
@ -1562,7 +1563,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
h.c[1] = fmax_chan_val;
} else {
for (uint32_t i = 0; i < 16; i++) {
const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
sum_xy_r += b * r, sum_xy_g += b * g, sum_xy_b += b * b;
}
@ -1630,9 +1631,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
int icov_xz = 0, icov_yz = 0;
for (uint32_t i = 0; i < 16; i++) {
int r = (int)pSrc_pixels[i].R - avg_r;
int g = (int)pSrc_pixels[i].G - avg_g;
int b = (int)pSrc_pixels[i].B - avg_b;
int r = (int)pSrc_pixels[i].r - avg_r;
int g = (int)pSrc_pixels[i].g - avg_g;
int b = (int)pSrc_pixels[i].b - avg_b;
icov_xz += r * b;
icov_yz += g * b;
}
@ -1668,9 +1669,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
int icov_xz = 0, icov_yz = 0;
for (uint32_t i = 0; i < 16; i++) {
int r = (int)pSrc_pixels[i].R - avg_r;
int g = (int)pSrc_pixels[i].G - avg_g;
int b = (int)pSrc_pixels[i].B - avg_b;
int r = (int)pSrc_pixels[i].r - avg_r;
int g = (int)pSrc_pixels[i].g - avg_g;
int b = (int)pSrc_pixels[i].b - avg_b;
icov_xz += r * b;
icov_yz += g * b;
}
@ -1680,6 +1681,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
int x1 = max_r;
int y1 = max_g;
// swap r and g min and max to align principal axis
if (icov_xz < 0) std::swap(x0, x1);
if (icov_yz < 0) std::swap(y0, y1);
@ -1697,9 +1699,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
int icov[6] = {0, 0, 0, 0, 0, 0};
for (uint32_t i = 0; i < 16; i++) {
int r = (int)pSrc_pixels[i].R - avg_r;
int g = (int)pSrc_pixels[i].G - avg_g;
int b = (int)pSrc_pixels[i].B - avg_b;
int r = (int)pSrc_pixels[i].r - avg_r;
int g = (int)pSrc_pixels[i].g - avg_g;
int b = (int)pSrc_pixels[i].b - avg_b;
icov[0] += r * r;
icov[1] += r * g;
icov[2] += r * b;
@ -1746,10 +1748,10 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
saxis_b = (int)((uint32_t)saxis_b << 4U);
for (uint32_t i = 0; i < 16; i += 4) {
int dot0 = ((pSrc_pixels[i].R * saxis_r + pSrc_pixels[i].G * saxis_g + pSrc_pixels[i].B * saxis_b) & ~0xF) + i;
int dot1 = ((pSrc_pixels[i + 1].R * saxis_r + pSrc_pixels[i + 1].G * saxis_g + pSrc_pixels[i + 1].B * saxis_b) & ~0xF) + i + 1;
int dot2 = ((pSrc_pixels[i + 2].R * saxis_r + pSrc_pixels[i + 2].G * saxis_g + pSrc_pixels[i + 2].B * saxis_b) & ~0xF) + i + 2;
int dot3 = ((pSrc_pixels[i + 3].R * saxis_r + pSrc_pixels[i + 3].G * saxis_g + pSrc_pixels[i + 3].B * saxis_b) & ~0xF) + i + 3;
int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i;
int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1;
int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2;
int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3;
int min_d01 = std::min(dot0, dot1);
int max_d01 = std::max(dot0, dot1);
@ -1766,13 +1768,13 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
low_c = low_dot & 15;
high_c = high_dot & 15;
lr = scale8To5(pSrc_pixels[low_c].R);
lg = scale8To6(pSrc_pixels[low_c].G);
lb = scale8To5(pSrc_pixels[low_c].B);
lr = scale8To5(pSrc_pixels[low_c].r);
lg = scale8To6(pSrc_pixels[low_c].g);
lb = scale8To5(pSrc_pixels[low_c].b);
hr = scale8To5(pSrc_pixels[high_c].R);
hg = scale8To6(pSrc_pixels[high_c].G);
hb = scale8To5(pSrc_pixels[high_c].B);
hr = scale8To5(pSrc_pixels[high_c].r);
hg = scale8To6(pSrc_pixels[high_c].g);
hb = scale8To5(pSrc_pixels[high_c].b);
}
}
@ -1857,11 +1859,11 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot
int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b;
const uint32_t fr = pSrc_pixels[0].R, fg = pSrc_pixels[0].G, fb = pSrc_pixels[0].B;
const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;
uint32_t j;
for (j = 15; j >= 1; --j)
if ((pSrc_pixels[j].R != fr) || (pSrc_pixels[j].G != fg) || (pSrc_pixels[j].B != fb)) break;
if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) break;
if (j == 0) {
encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0);
@ -1877,7 +1879,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot
uint32_t any_black_pixels = (fr | fg | fb) < 4;
for (uint32_t i = 1; i < 16; i++) {
const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
grayscale_flag &= ((r == g) && (r == b));
any_black_pixels |= ((r | g | b) < 4);
@ -1953,6 +1955,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot
} // ls_pass
} else {
// calculate block error from naïve approach
const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1;
for (uint32_t round = 0; round < total_rounds; round++) {
uint32_t modified_flags = flags;
@ -2065,9 +2068,9 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot
int dots[16];
for (uint32_t i = 0; i < 16; i++) {
int r = pSrc_pixels[i].R;
int g = pSrc_pixels[i].G;
int b = pSrc_pixels[i].B;
int r = pSrc_pixels[i].r;
int g = pSrc_pixels[i].g;
int b = pSrc_pixels[i].b;
int d = 0x1000000 + (r * ar + g * ag + b * ab);
assert(d >= 0);
dots[i] = (d << 4) + i;
@ -2084,9 +2087,9 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot
g_sum[i] = g;
b_sum[i] = b;
r += pSrc_pixels[p].R;
g += pSrc_pixels[p].G;
b += pSrc_pixels[p].B;
r += pSrc_pixels[p].r;
g += pSrc_pixels[p].g;
b += pSrc_pixels[p].b;
}
r_sum[16] = total_r;
@ -2368,43 +2371,43 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr
bool used_punchthrough = false;
if (l > h) {
c[0].set(r0, g0, b0, 255);
c[1].set(r1, g1, b1, 255);
c[0].Set(r0, g0, b0, 255);
c[1].Set(r1, g1, b1, 255);
switch (mode) {
case bc1_approx_mode::cBC1Ideal:
c[2].set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
c[3].set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
c[2].Set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
c[3].Set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
break;
case bc1_approx_mode::cBC1IdealRound4:
c[2].set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255);
c[3].set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255);
c[2].Set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255);
c[3].Set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255);
break;
case bc1_approx_mode::cBC1NVidia:
c[2].set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255);
c[3].set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255);
c[2].Set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255);
c[3].Set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255);
break;
case bc1_approx_mode::cBC1AMD:
c[2].set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
c[3].set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
c[2].Set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
c[3].Set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
break;
}
} else {
c[0].set(r0, g0, b0, 255);
c[1].set(r1, g1, b1, 255);
c[0].Set(r0, g0, b0, 255);
c[1].Set(r1, g1, b1, 255);
switch (mode) {
case bc1_approx_mode::cBC1Ideal:
case bc1_approx_mode::cBC1IdealRound4:
c[2].set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
c[2].Set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
break;
case bc1_approx_mode::cBC1NVidia:
c[2].set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255);
c[2].Set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255);
break;
case bc1_approx_mode::cBC1AMD:
c[2].set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
c[2].Set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
break;
}
c[3].set(0, 0, 0, 0);
c[3].Set(0, 0, 0, 0);
used_punchthrough = true;
}
@ -2417,10 +2420,10 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr
}
} else {
for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) {
pDst_pixels[0].set(c[pBlock->GetSelector(0, y)]);
pDst_pixels[1].set(c[pBlock->GetSelector(1, y)]);
pDst_pixels[2].set(c[pBlock->GetSelector(2, y)]);
pDst_pixels[3].set(c[pBlock->GetSelector(3, y)]);
pDst_pixels[0].Set(c[pBlock->GetSelector(0, y)]);
pDst_pixels[1].Set(c[pBlock->GetSelector(1, y)]);
pDst_pixels[2].Set(c[pBlock->GetSelector(2, y)]);
pDst_pixels[3].Set(c[pBlock->GetSelector(3, y)]);
}
}
@ -2452,7 +2455,7 @@ bool unpack_bc3(const void *pBlock_bits, void *pPixels, bc1_approx_mode mode) {
if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(BC4Block), pDst_pixels, true, mode)) success = false;
unpack_bc4(pBlock_bits, &pDst_pixels[0].A, sizeof(Color32));
unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(Color32));
return success;
}

@ -56,6 +56,7 @@
#include <cstdint>
#include "blocks.h"
#include "interpolator.h"
// By default, the table used to accelerate cluster fit on 4 color blocks uses a 969x128 entry table.
// To reduce the executable size, set RGBCX_USE_SMALLER_TABLES to 1, which selects the smaller 969x32 entry table.

@ -32,8 +32,8 @@ static inline uint8_t scale8To6(uint32_t v) {
return (uint8_t)((v + (v >> 8)) >> 8);
}
static inline int scale5To8(int v) { return (v << 3) | (v >> 2); }
static inline int scale6To8(int v) { return (v << 2) | (v >> 4); }
static constexpr int scale5To8(int v) { return (v << 3) | (v >> 2); }
static constexpr int scale6To8(int v) { return (v << 2) | (v >> 4); }
template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }

Loading…
Cancel
Save