Extract utility functions and block structs to their own files

faster-single-tables
Andrew Cassidy 3 years ago
parent cddea42f7e
commit e0511f1aea

@ -4,9 +4,10 @@ project(python_rgbcx)
# Make the python_rgbcx module
set(C_FILES
src/main.cpp
src/rgbcx.cpp
src/tables.cpp
)
src/rgbcx.cpp src/rgbcx.cpp
src/tables.cpp src/tables.h src/table4.h
src/blocks.cpp src/blocks.h
src/util.h)
add_library(python_rgbcx MODULE ${C_FILES})
# Link to Pybind
@ -17,4 +18,4 @@ pybind11_extension(python_rgbcx)
pybind11_strip(python_rgbcx)
# Set module features, like C/C++ standards
target_compile_features(python_rgbcx PUBLIC cxx_std_14 c_std_11)
target_compile_features(python_rgbcx PUBLIC cxx_std_17 c_std_11)

@ -0,0 +1,55 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "blocks.h"
#include <algorithm>
#include <cassert>
// region color32 implementation
color32::color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
uint8_t color32::operator[](uint32_t idx) const {
assert(idx < 4);
return c[idx];
}
uint8_t &color32::operator[](uint32_t idx) {
assert(idx < 4);
return c[idx];
}
void color32::set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va) {
c[0] = vr;
c[1] = vg;
c[2] = vb;
c[3] = va;
}
void color32::set_rgb(const color32 &other) {
c[0] = other.c[0];
c[1] = other.c[1];
c[2] = other.c[2];
}
color32 color32::comp_min(const color32 &a, const color32 &b) {
return color32(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3]));
}
color32 color32::comp_max(const color32 &a, const color32 &b) {
return color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3]));
}
// endregion

@ -0,0 +1,185 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "util.h"
#include <cassert>
#include <cstdint>
constexpr inline uint8_t DXT1SelectorBits = 2U;
struct color32 {
union {
struct {
uint8_t r;
uint8_t g;
uint8_t b;
uint8_t a;
};
uint8_t c[4];
uint32_t m;
};
color32() {}
color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va);
void set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va);
void set_rgb(const color32 &other);
uint8_t operator[](uint32_t idx) const;
uint8_t &operator[](uint32_t idx);
bool operator==(const color32 &rhs) const { return m == rhs.m; }
static color32 comp_min(const color32 &a, const color32 &b);
static color32 comp_max(const color32 &a, const color32 &b);
};
struct bc1_block {
constexpr static inline size_t EndpointSize = 2;
constexpr static inline size_t SelectorSize = 4;
constexpr static inline uint8_t SelectorBits = 2;
constexpr static inline uint8_t SelectorValues = 1 << SelectorBits;
constexpr static inline uint8_t SelectorMask = SelectorValues - 1;
uint8_t m_low_color[EndpointSize];
uint8_t m_high_color[EndpointSize];
uint8_t m_selectors[SelectorSize];
inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
inline bool is_3color() const { return get_low_color() <= get_high_color(); }
inline void set_low_color(uint16_t c) {
m_low_color[0] = static_cast<uint8_t>(c & 0xFF);
m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF);
}
inline void set_high_color(uint16_t c) {
m_high_color[0] = static_cast<uint8_t>(c & 0xFF);
m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF);
}
inline uint32_t get_selector(uint32_t x, uint32_t y) const {
assert((x < 4U) && (y < 4U));
return (m_selectors[y] >> (x * SelectorBits)) & SelectorMask;
}
inline void set_selector(uint32_t x, uint32_t y, uint32_t val) {
assert((x < 4U) && (y < 4U) && (val < 4U));
m_selectors[y] &= (~(SelectorMask << (x * SelectorBits)));
m_selectors[y] |= (val << (x * DXT1SelectorBits));
}
static inline uint16_t pack_color(const color32 &color, bool scaled, uint32_t bias = 127U) {
uint32_t r = color.r, g = color.g, b = color.b;
if (scaled) {
r = (r * 31U + bias) / 255U;
g = (g * 63U + bias) / 255U;
b = (b * 31U + bias) / 255U;
}
return static_cast<uint16_t>(minimum(b, 31U) | (minimum(g, 63U) << 5U) | (minimum(r, 31U) << 11U));
}
static inline uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
static inline void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) {
r = (c >> 11) & 31;
g = (c >> 5) & 63;
b = c & 31;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
}
static inline void unpack_color_unscaled(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) {
r = (c >> 11) & 31;
g = (c >> 5) & 63;
b = c & 31;
}
};
struct bc4_block {
constexpr static inline size_t EndpointSize = 1;
constexpr static inline size_t SelectorSize = 6;
constexpr static inline uint8_t SelectorBits = 3;
constexpr static inline uint8_t SelectorValues = 1 << SelectorBits;
constexpr static inline uint8_t SelectorMask = SelectorValues - 1;
uint8_t m_endpoints[EndpointSize * 2];
uint8_t m_selectors[SelectorSize];
inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
inline uint64_t get_selector_bits() const {
return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) |
((uint32_t)m_selectors[3] << 24U))) |
(((uint64_t)m_selectors[4]) << 32U) | (((uint64_t)m_selectors[5]) << 40U);
}
inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
assert((x < 4U) && (y < 4U));
return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask);
}
static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) {
pDst[0] = static_cast<uint8_t>(l);
pDst[1] = static_cast<uint8_t>(h);
pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
pDst[6] = 0;
pDst[7] = 255;
return 6;
}
static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) {
pDst[0] = static_cast<uint8_t>(l);
pDst[1] = static_cast<uint8_t>(h);
pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
return 8;
}
static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) {
if (l > h)
return get_block_values8(pDst, l, h);
else
return get_block_values6(pDst, l, h);
}
};
struct bc3_block {
bc4_block alpha_block;
bc1_block color_block;
};
struct bc5_block {
bc4_block r_block;
bc4_block g_block;
};

@ -1,189 +1,18 @@
// rgbcx.h v1.12
// High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 <richgel99@gmail.com>.
#include <algorithm>
#include <cassert>
#include <climits>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include "util.h"
#include "tables.h"
#include "blocks.h"
#include "rgbcx.h"
namespace rgbcx {
static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i); }
static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i); }
static inline uint8_t to_5(uint32_t v) {
v = v * 31 + 128;
return (uint8_t)((v + (v >> 8)) >> 8);
}
static inline uint8_t to_6(uint32_t v) {
v = v * 63 + 128;
return (uint8_t)((v + (v >> 8)) >> 8);
}
template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
template <typename S> inline S minimum(S a, S b) { return (a < b) ? a : b; }
template <typename S> inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); }
template <typename S> inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); }
template <typename T> inline T square(T a) { return a * a; }
static inline float clampf(float value, float low, float high) {
if (value < low)
value = low;
else if (value > high)
value = high;
return value;
}
static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
static inline int32_t clampi(int32_t value, int32_t low, int32_t high) {
if (value < low)
value = low;
else if (value > high)
value = high;
return value;
}
static inline int squarei(int a) { return a * a; }
static inline int absi(int a) { return (a < 0) ? -a : a; }
template <typename F> inline F lerp(F a, F b, F s) { return a + (b - a) * s; }
enum class eNoClamp { cNoClamp };
struct color32 {
union {
struct {
uint8_t r;
uint8_t g;
uint8_t b;
uint8_t a;
};
uint8_t c[4];
uint32_t m;
};
color32() {}
color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) {
(void)unused;
set_noclamp_rgba(vr, vg, vb, va);
}
void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) {
c[0] = static_cast<uint8_t>(vr);
c[1] = static_cast<uint8_t>(vg);
c[2] = static_cast<uint8_t>(vb);
c[3] = static_cast<uint8_t>(va);
}
void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) {
c[0] = static_cast<uint8_t>(vr);
c[1] = static_cast<uint8_t>(vg);
c[2] = static_cast<uint8_t>(vb);
}
void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
void set_clamped(int vr, int vg, int vb, int va) {
c[0] = clamp255(vr);
c[1] = clamp255(vg);
c[2] = clamp255(vb);
c[3] = clamp255(va);
}
uint8_t operator[](uint32_t idx) const {
assert(idx < 4);
return c[idx];
}
uint8_t &operator[](uint32_t idx) {
assert(idx < 4);
return c[idx];
}
bool operator==(const color32 &rhs) const { return m == rhs.m; }
void set_rgb(const color32 &other) {
c[0] = static_cast<uint8_t>(other.c[0]);
c[1] = static_cast<uint8_t>(other.c[1]);
c[2] = static_cast<uint8_t>(other.c[2]);
}
static color32 comp_min(const color32 &a, const color32 &b) {
return color32(eNoClamp::cNoClamp, std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3]));
}
static color32 comp_max(const color32 &a, const color32 &b) {
return color32(eNoClamp::cNoClamp, std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3]));
}
};
enum dxt_constants {
cDXT1SelectorBits = 2U,
cDXT1SelectorValues = 1U << cDXT1SelectorBits,
cDXT1SelectorMask = cDXT1SelectorValues - 1U,
cDXT5SelectorBits = 3U,
cDXT5SelectorValues = 1U << cDXT5SelectorBits,
cDXT5SelectorMask = cDXT5SelectorValues - 1U,
};
struct bc1_block {
enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 };
uint8_t m_low_color[cTotalEndpointBytes];
uint8_t m_high_color[cTotalEndpointBytes];
uint8_t m_selectors[cTotalSelectorBytes];
inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); }
inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); }
inline bool is_3color() const { return get_low_color() <= get_high_color(); }
inline void set_low_color(uint16_t c) {
m_low_color[0] = static_cast<uint8_t>(c & 0xFF);
m_low_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF);
}
inline void set_high_color(uint16_t c) {
m_high_color[0] = static_cast<uint8_t>(c & 0xFF);
m_high_color[1] = static_cast<uint8_t>((c >> 8) & 0xFF);
}
inline uint32_t get_selector(uint32_t x, uint32_t y) const {
assert((x < 4U) && (y < 4U));
return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask;
}
inline void set_selector(uint32_t x, uint32_t y, uint32_t val) {
assert((x < 4U) && (y < 4U) && (val < 4U));
m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits)));
m_selectors[y] |= (val << (x * cDXT1SelectorBits));
}
static inline uint16_t pack_color(const color32 &color, bool scaled, uint32_t bias = 127U) {
uint32_t r = color.r, g = color.g, b = color.b;
if (scaled) {
r = (r * 31U + bias) / 255U;
g = (g * 63U + bias) / 255U;
b = (b * 31U + bias) / 255U;
}
return static_cast<uint16_t>(minimum(b, 31U) | (minimum(g, 63U) << 5U) | (minimum(r, 31U) << 11U));
}
static inline uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
static inline void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) {
r = (c >> 11) & 31;
g = (c >> 5) & 63;
b = c & 31;
r = (r << 3) | (r >> 2);
g = (g << 2) | (g >> 4);
b = (b << 3) | (b >> 2);
}
static inline void unpack_color_unscaled(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) {
r = (c >> 11) & 31;
g = (c >> 5) & 63;
b = c & 31;
}
};
namespace rgbcx {
static const uint32_t TOTAL_ORDER_4_0_16 = 15;
static const uint32_t TOTAL_ORDER_4_1_16 = 700;
@ -285,9 +114,6 @@ static bc1_approx_mode g_bc1_approx_mode;
static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256];
static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256];
static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); }
static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); }
// v0, v1 = unexpanded DXT1 endpoint values (5/6-bits)
// c0, c1 = expanded DXT1 endpoint values (8-bits)
static inline int interp_5_6_ideal(int c0, int c1) {
@ -2387,60 +2213,6 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot
}
// BC3-5
struct bc4_block {
enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 };
uint8_t m_endpoints[2];
uint8_t m_selectors[cTotalSelectorBytes];
inline uint32_t get_low_alpha() const { return m_endpoints[0]; }
inline uint32_t get_high_alpha() const { return m_endpoints[1]; }
inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
inline uint64_t get_selector_bits() const {
return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) |
((uint32_t)m_selectors[3] << 24U))) |
(((uint64_t)m_selectors[4]) << 32U) | (((uint64_t)m_selectors[5]) << 40U);
}
inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
assert((x < 4U) && (y < 4U));
return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1);
}
static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) {
pDst[0] = static_cast<uint8_t>(l);
pDst[1] = static_cast<uint8_t>(h);
pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
pDst[6] = 0;
pDst[7] = 255;
return 6;
}
static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) {
pDst[0] = static_cast<uint8_t>(l);
pDst[1] = static_cast<uint8_t>(h);
pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
return 8;
}
static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) {
if (l > h)
return get_block_values8(pDst, l, h);
else
return get_block_values6(pDst, l, h);
}
};
void encode_bc4(void *pDst, const uint8_t *pPixels, uint32_t stride) {
assert(g_initialized);
@ -2640,43 +2412,43 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr
bool used_punchthrough = false;
if (l > h) {
c[0].set_noclamp_rgba(r0, g0, b0, 255);
c[1].set_noclamp_rgba(r1, g1, b1, 255);
c[0].set(r0, g0, b0, 255);
c[1].set(r1, g1, b1, 255);
switch (mode) {
case bc1_approx_mode::cBC1Ideal:
c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
c[2].set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
c[3].set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
break;
case bc1_approx_mode::cBC1IdealRound4:
c[2].set_noclamp_rgba((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255);
c[3].set_noclamp_rgba((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255);
c[2].set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255);
c[3].set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255);
break;
case bc1_approx_mode::cBC1NVidia:
c[2].set_noclamp_rgba(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255);
c[3].set_noclamp_rgba(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255);
c[2].set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255);
c[3].set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255);
break;
case bc1_approx_mode::cBC1AMD:
c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
c[2].set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
c[3].set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
break;
}
} else {
c[0].set_noclamp_rgba(r0, g0, b0, 255);
c[1].set_noclamp_rgba(r1, g1, b1, 255);
c[0].set(r0, g0, b0, 255);
c[1].set(r1, g1, b1, 255);
switch (mode) {
case bc1_approx_mode::cBC1Ideal:
case bc1_approx_mode::cBC1IdealRound4:
c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
c[2].set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
break;
case bc1_approx_mode::cBC1NVidia:
c[2].set_noclamp_rgba(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255);
c[2].set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255);
break;
case bc1_approx_mode::cBC1AMD:
c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
c[2].set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
break;
}
c[3].set_noclamp_rgba(0, 0, 0, 0);
c[3].set(0, 0, 0, 0);
used_punchthrough = true;
}

@ -53,14 +53,7 @@
//
#pragma once
#include <algorithm>
#include <cassert>
#include <climits>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <cmath>
#include "tables.h"
// By default, the table used to accelerate cluster fit on 4 color blocks uses a 969x128 entry table.
// To reduce the executable size, set RGBCX_USE_SMALLER_TABLES to 1, which selects the smaller 969x32 entry table.
@ -153,7 +146,6 @@ enum {
cEncodeBC1EndpointSearchRoundsMask = 1023U << cEncodeBC1EndpointSearchRoundsShift,
};
// DEFAULT_TOTAL_ORDERINGS_TO_TRY is around 3x faster than libsquish at slightly higher average quality. 10-16 is a good range to start to compete against
// libsquish.
const uint32_t DEFAULT_TOTAL_ORDERINGS_TO_TRY = 10;

@ -0,0 +1,74 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdint>
static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i); }
static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i); }
static inline uint8_t to_5(uint32_t v) {
v = v * 31 + 128;
return (uint8_t)((v + (v >> 8)) >> 8);
}
static inline uint8_t to_6(uint32_t v) {
v = v * 63 + 128;
return (uint8_t)((v + (v >> 8)) >> 8);
}
static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); }
static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); }
template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
template <typename S> inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
template <typename S> inline S minimum(S a, S b) { return (a < b) ? a : b; }
template <typename S> inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); }
template <typename S> inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); }
template <typename T> inline T square(T a) { return a * a; }
static inline float clampf(float value, float low, float high) {
if (value < low)
value = low;
else if (value > high)
value = high;
return value;
}
static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
static inline int32_t clampi(int32_t value, int32_t low, int32_t high) {
if (value < low)
value = low;
else if (value > high)
value = high;
return value;
}
static inline int squarei(int a) { return a * a; }
static inline int absi(int a) { return (a < 0) ? -a : a; }
template <typename F> inline F lerp(F a, F b, F s) { return a + (b - a) * s; }
Loading…
Cancel
Save