From e0511f1aea20e773b7ed4077a95e40a810215fdd Mon Sep 17 00:00:00 2001 From: drewcassidy Date: Mon, 1 Feb 2021 00:25:19 -0800 Subject: [PATCH] Extract utility functions and block structs to their own files --- CMakeLists.txt | 9 +- src/blocks.cpp | 55 ++++++++++ src/blocks.h | 185 ++++++++++++++++++++++++++++++++ src/rgbcx.cpp | 280 +++++-------------------------------------------- src/rgbcx.h | 8 -- src/util.h | 74 +++++++++++++ 6 files changed, 345 insertions(+), 266 deletions(-) create mode 100644 src/blocks.cpp create mode 100644 src/blocks.h create mode 100644 src/util.h diff --git a/CMakeLists.txt b/CMakeLists.txt index ae60159..49123f7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -4,9 +4,10 @@ project(python_rgbcx) # Make the python_rgbcx module set(C_FILES src/main.cpp - src/rgbcx.cpp - src/tables.cpp - ) + src/rgbcx.cpp src/rgbcx.cpp + src/tables.cpp src/tables.h src/table4.h + src/blocks.cpp src/blocks.h + src/util.h) add_library(python_rgbcx MODULE ${C_FILES}) # Link to Pybind @@ -17,4 +18,4 @@ pybind11_extension(python_rgbcx) pybind11_strip(python_rgbcx) # Set module features, like C/C++ standards -target_compile_features(python_rgbcx PUBLIC cxx_std_14 c_std_11) \ No newline at end of file +target_compile_features(python_rgbcx PUBLIC cxx_std_17 c_std_11) \ No newline at end of file diff --git a/src/blocks.cpp b/src/blocks.cpp new file mode 100644 index 0000000..3587897 --- /dev/null +++ b/src/blocks.cpp @@ -0,0 +1,55 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich 2020 + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + */ + +#include "blocks.h" +#include +#include + +// region color32 implementation +color32::color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } + +uint8_t color32::operator[](uint32_t idx) const { + assert(idx < 4); + return c[idx]; +} + +uint8_t &color32::operator[](uint32_t idx) { + assert(idx < 4); + return c[idx]; +} + +void color32::set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va) { + c[0] = vr; + c[1] = vg; + c[2] = vb; + c[3] = va; +} + +void color32::set_rgb(const color32 &other) { + c[0] = other.c[0]; + c[1] = other.c[1]; + c[2] = other.c[2]; +} +color32 color32::comp_min(const color32 &a, const color32 &b) { + return color32(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); +} +color32 color32::comp_max(const color32 &a, const color32 &b) { + return color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); +} +// endregion diff --git a/src/blocks.h b/src/blocks.h new file mode 100644 index 0000000..a9ba0e2 --- /dev/null +++ b/src/blocks.h @@ -0,0 +1,185 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich 2020 + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + */ + +#pragma once + +#include "util.h" +#include +#include + +constexpr inline uint8_t DXT1SelectorBits = 2U; + +struct color32 { + union { + struct { + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; + }; + + uint8_t c[4]; + + uint32_t m; + }; + + color32() {} + + color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va); + + void set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va); + + void set_rgb(const color32 &other); + + uint8_t operator[](uint32_t idx) const; + uint8_t &operator[](uint32_t idx); + + bool operator==(const color32 &rhs) const { return m == rhs.m; } + + static color32 comp_min(const color32 &a, const color32 &b); + static color32 comp_max(const color32 &a, const color32 &b); +}; + +struct bc1_block { + constexpr static inline size_t EndpointSize = 2; + constexpr static inline size_t SelectorSize = 4; + constexpr static inline uint8_t SelectorBits = 2; + constexpr static inline uint8_t SelectorValues = 1 << SelectorBits; + constexpr static inline uint8_t SelectorMask = SelectorValues - 1; + + uint8_t m_low_color[EndpointSize]; + uint8_t m_high_color[EndpointSize]; + uint8_t m_selectors[SelectorSize]; + + inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } + inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } + inline bool is_3color() const { return get_low_color() <= get_high_color(); } + inline void set_low_color(uint16_t c) { + m_low_color[0] = static_cast(c & 0xFF); + m_low_color[1] = static_cast((c >> 8) & 0xFF); + } + inline void set_high_color(uint16_t c) { + m_high_color[0] = static_cast(c & 0xFF); + m_high_color[1] = static_cast((c >> 8) & 0xFF); + } + inline uint32_t get_selector(uint32_t x, uint32_t y) const { + assert((x < 4U) && (y < 4U)); + return (m_selectors[y] >> (x * SelectorBits)) & SelectorMask; + } + inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { + assert((x < 4U) && (y < 4U) && (val < 4U)); + m_selectors[y] &= (~(SelectorMask << (x * SelectorBits))); + m_selectors[y] |= (val << (x * DXT1SelectorBits)); + } + + static inline uint16_t pack_color(const color32 &color, bool scaled, uint32_t bias = 127U) { + uint32_t r = color.r, g = color.g, b = color.b; + if (scaled) { + r = (r * 31U + bias) / 255U; + g = (g * 63U + bias) / 255U; + b = (b * 31U + bias) / 255U; + } + return static_cast(minimum(b, 31U) | (minimum(g, 63U) << 5U) | (minimum(r, 31U) << 11U)); + } + + static inline uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast(b | (g << 5U) | (r << 11U)); } + + static inline void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) { + r = (c >> 11) & 31; + g = (c >> 5) & 63; + b = c & 31; + + r = (r << 3) | (r >> 2); + g = (g << 2) | (g >> 4); + b = (b << 3) | (b >> 2); + } + + static inline void unpack_color_unscaled(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) { + r = (c >> 11) & 31; + g = (c >> 5) & 63; + b = c & 31; + } +}; + +struct bc4_block { + constexpr static inline size_t EndpointSize = 1; + constexpr static inline size_t SelectorSize = 6; + constexpr static inline uint8_t SelectorBits = 3; + constexpr static inline uint8_t SelectorValues = 1 << SelectorBits; + constexpr static inline uint8_t SelectorMask = SelectorValues - 1; + + uint8_t m_endpoints[EndpointSize * 2]; + uint8_t m_selectors[SelectorSize]; + + inline uint32_t get_low_alpha() const { return m_endpoints[0]; } + inline uint32_t get_high_alpha() const { return m_endpoints[1]; } + inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } + + inline uint64_t get_selector_bits() const { + return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | + ((uint32_t)m_selectors[3] << 24U))) | + (((uint64_t)m_selectors[4]) << 32U) | (((uint64_t)m_selectors[5]) << 40U); + } + + inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const { + assert((x < 4U) && (y < 4U)); + return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask); + } + + static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) { + pDst[0] = static_cast(l); + pDst[1] = static_cast(h); + pDst[2] = static_cast((l * 4 + h) / 5); + pDst[3] = static_cast((l * 3 + h * 2) / 5); + pDst[4] = static_cast((l * 2 + h * 3) / 5); + pDst[5] = static_cast((l + h * 4) / 5); + pDst[6] = 0; + pDst[7] = 255; + return 6; + } + + static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) { + pDst[0] = static_cast(l); + pDst[1] = static_cast(h); + pDst[2] = static_cast((l * 6 + h) / 7); + pDst[3] = static_cast((l * 5 + h * 2) / 7); + pDst[4] = static_cast((l * 4 + h * 3) / 7); + pDst[5] = static_cast((l * 3 + h * 4) / 7); + pDst[6] = static_cast((l * 2 + h * 5) / 7); + pDst[7] = static_cast((l + h * 6) / 7); + return 8; + } + + static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) { + if (l > h) + return get_block_values8(pDst, l, h); + else + return get_block_values6(pDst, l, h); + } +}; + +struct bc3_block { + bc4_block alpha_block; + bc1_block color_block; +}; + +struct bc5_block { + bc4_block r_block; + bc4_block g_block; +}; diff --git a/src/rgbcx.cpp b/src/rgbcx.cpp index 3c78bff..426683c 100644 --- a/src/rgbcx.cpp +++ b/src/rgbcx.cpp @@ -1,190 +1,19 @@ // rgbcx.h v1.12 // High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 . +#include +#include +#include +#include +#include +#include +#include "util.h" +#include "tables.h" +#include "blocks.h" #include "rgbcx.h" + namespace rgbcx { -static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } -static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } - -static inline uint8_t to_5(uint32_t v) { - v = v * 31 + 128; - return (uint8_t)((v + (v >> 8)) >> 8); -} -static inline uint8_t to_6(uint32_t v) { - v = v * 63 + 128; - return (uint8_t)((v + (v >> 8)) >> 8); -} - -template inline S maximum(S a, S b) { return (a > b) ? a : b; } -template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } -template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } - -template inline S minimum(S a, S b) { return (a < b) ? a : b; } -template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } -template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } - -template inline T square(T a) { return a * a; } - -static inline float clampf(float value, float low, float high) { - if (value < low) - value = low; - else if (value > high) - value = high; - return value; -} -static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } - -template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } -static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { - if (value < low) - value = low; - else if (value > high) - value = high; - return value; -} - -static inline int squarei(int a) { return a * a; } -static inline int absi(int a) { return (a < 0) ? -a : a; } - -template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } - -enum class eNoClamp { cNoClamp }; - -struct color32 { - union { - struct { - uint8_t r; - uint8_t g; - uint8_t b; - uint8_t a; - }; - - uint8_t c[4]; - - uint32_t m; - }; - - color32() {} - - color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } - color32(eNoClamp unused, uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { - (void)unused; - set_noclamp_rgba(vr, vg, vb, va); - } - - void set(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { - c[0] = static_cast(vr); - c[1] = static_cast(vg); - c[2] = static_cast(vb); - c[3] = static_cast(va); - } - - void set_noclamp_rgb(uint32_t vr, uint32_t vg, uint32_t vb) { - c[0] = static_cast(vr); - c[1] = static_cast(vg); - c[2] = static_cast(vb); - } - void set_noclamp_rgba(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } - - void set_clamped(int vr, int vg, int vb, int va) { - c[0] = clamp255(vr); - c[1] = clamp255(vg); - c[2] = clamp255(vb); - c[3] = clamp255(va); - } - - uint8_t operator[](uint32_t idx) const { - assert(idx < 4); - return c[idx]; - } - uint8_t &operator[](uint32_t idx) { - assert(idx < 4); - return c[idx]; - } - - bool operator==(const color32 &rhs) const { return m == rhs.m; } - - void set_rgb(const color32 &other) { - c[0] = static_cast(other.c[0]); - c[1] = static_cast(other.c[1]); - c[2] = static_cast(other.c[2]); - } - - static color32 comp_min(const color32 &a, const color32 &b) { - return color32(eNoClamp::cNoClamp, std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); - } - static color32 comp_max(const color32 &a, const color32 &b) { - return color32(eNoClamp::cNoClamp, std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); - } -}; - -enum dxt_constants { - cDXT1SelectorBits = 2U, - cDXT1SelectorValues = 1U << cDXT1SelectorBits, - cDXT1SelectorMask = cDXT1SelectorValues - 1U, - cDXT5SelectorBits = 3U, - cDXT5SelectorValues = 1U << cDXT5SelectorBits, - cDXT5SelectorMask = cDXT5SelectorValues - 1U, -}; - -struct bc1_block { - enum { cTotalEndpointBytes = 2, cTotalSelectorBytes = 4 }; - - uint8_t m_low_color[cTotalEndpointBytes]; - uint8_t m_high_color[cTotalEndpointBytes]; - uint8_t m_selectors[cTotalSelectorBytes]; - - inline uint32_t get_low_color() const { return m_low_color[0] | (m_low_color[1] << 8U); } - inline uint32_t get_high_color() const { return m_high_color[0] | (m_high_color[1] << 8U); } - inline bool is_3color() const { return get_low_color() <= get_high_color(); } - inline void set_low_color(uint16_t c) { - m_low_color[0] = static_cast(c & 0xFF); - m_low_color[1] = static_cast((c >> 8) & 0xFF); - } - inline void set_high_color(uint16_t c) { - m_high_color[0] = static_cast(c & 0xFF); - m_high_color[1] = static_cast((c >> 8) & 0xFF); - } - inline uint32_t get_selector(uint32_t x, uint32_t y) const { - assert((x < 4U) && (y < 4U)); - return (m_selectors[y] >> (x * cDXT1SelectorBits)) & cDXT1SelectorMask; - } - inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { - assert((x < 4U) && (y < 4U) && (val < 4U)); - m_selectors[y] &= (~(cDXT1SelectorMask << (x * cDXT1SelectorBits))); - m_selectors[y] |= (val << (x * cDXT1SelectorBits)); - } - - static inline uint16_t pack_color(const color32 &color, bool scaled, uint32_t bias = 127U) { - uint32_t r = color.r, g = color.g, b = color.b; - if (scaled) { - r = (r * 31U + bias) / 255U; - g = (g * 63U + bias) / 255U; - b = (b * 31U + bias) / 255U; - } - return static_cast(minimum(b, 31U) | (minimum(g, 63U) << 5U) | (minimum(r, 31U) << 11U)); - } - - static inline uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast(b | (g << 5U) | (r << 11U)); } - - static inline void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) { - r = (c >> 11) & 31; - g = (c >> 5) & 63; - b = c & 31; - - r = (r << 3) | (r >> 2); - g = (g << 2) | (g >> 4); - b = (b << 3) | (b >> 2); - } - - static inline void unpack_color_unscaled(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) { - r = (c >> 11) & 31; - g = (c >> 5) & 63; - b = c & 31; - } -}; - static const uint32_t TOTAL_ORDER_4_0_16 = 15; static const uint32_t TOTAL_ORDER_4_1_16 = 700; static const uint32_t TOTAL_ORDER_4_2_16 = 753; @@ -285,9 +114,6 @@ static bc1_approx_mode g_bc1_approx_mode; static bc1_match_entry g_bc1_match5_equals_1[256], g_bc1_match6_equals_1[256]; static bc1_match_entry g_bc1_match5_half[256], g_bc1_match6_half[256]; -static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); } -static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); } - // v0, v1 = unexpanded DXT1 endpoint values (5/6-bits) // c0, c1 = expanded DXT1 endpoint values (8-bits) static inline int interp_5_6_ideal(int c0, int c1) { @@ -2387,60 +2213,6 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot } // BC3-5 - -struct bc4_block { - enum { cBC4SelectorBits = 3, cTotalSelectorBytes = 6, cMaxSelectorValues = 8 }; - uint8_t m_endpoints[2]; - - uint8_t m_selectors[cTotalSelectorBytes]; - - inline uint32_t get_low_alpha() const { return m_endpoints[0]; } - inline uint32_t get_high_alpha() const { return m_endpoints[1]; } - inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } - - inline uint64_t get_selector_bits() const { - return ((uint64_t)((uint32_t)m_selectors[0] | ((uint32_t)m_selectors[1] << 8U) | ((uint32_t)m_selectors[2] << 16U) | - ((uint32_t)m_selectors[3] << 24U))) | - (((uint64_t)m_selectors[4]) << 32U) | (((uint64_t)m_selectors[5]) << 40U); - } - - inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const { - assert((x < 4U) && (y < 4U)); - return (selector_bits >> (((y * 4) + x) * cBC4SelectorBits)) & (cMaxSelectorValues - 1); - } - - static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) { - pDst[0] = static_cast(l); - pDst[1] = static_cast(h); - pDst[2] = static_cast((l * 4 + h) / 5); - pDst[3] = static_cast((l * 3 + h * 2) / 5); - pDst[4] = static_cast((l * 2 + h * 3) / 5); - pDst[5] = static_cast((l + h * 4) / 5); - pDst[6] = 0; - pDst[7] = 255; - return 6; - } - - static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) { - pDst[0] = static_cast(l); - pDst[1] = static_cast(h); - pDst[2] = static_cast((l * 6 + h) / 7); - pDst[3] = static_cast((l * 5 + h * 2) / 7); - pDst[4] = static_cast((l * 4 + h * 3) / 7); - pDst[5] = static_cast((l * 3 + h * 4) / 7); - pDst[6] = static_cast((l * 2 + h * 5) / 7); - pDst[7] = static_cast((l + h * 6) / 7); - return 8; - } - - static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) { - if (l > h) - return get_block_values8(pDst, l, h); - else - return get_block_values6(pDst, l, h); - } -}; - void encode_bc4(void *pDst, const uint8_t *pPixels, uint32_t stride) { assert(g_initialized); @@ -2640,43 +2412,43 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr bool used_punchthrough = false; if (l > h) { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); + c[0].set(r0, g0, b0, 255); + c[1].set(r1, g1, b1, 255); switch (mode) { case bc1_approx_mode::cBC1Ideal: - c[2].set_noclamp_rgba((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); - c[3].set_noclamp_rgba((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); + c[2].set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); + c[3].set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); break; case bc1_approx_mode::cBC1IdealRound4: - c[2].set_noclamp_rgba((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); - c[3].set_noclamp_rgba((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); + c[2].set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); + c[3].set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); break; case bc1_approx_mode::cBC1NVidia: - c[2].set_noclamp_rgba(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); - c[3].set_noclamp_rgba(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); + c[2].set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); + c[3].set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); break; case bc1_approx_mode::cBC1AMD: - c[2].set_noclamp_rgba(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); - c[3].set_noclamp_rgba(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); + c[2].set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); + c[3].set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); break; } } else { - c[0].set_noclamp_rgba(r0, g0, b0, 255); - c[1].set_noclamp_rgba(r1, g1, b1, 255); + c[0].set(r0, g0, b0, 255); + c[1].set(r1, g1, b1, 255); switch (mode) { case bc1_approx_mode::cBC1Ideal: case bc1_approx_mode::cBC1IdealRound4: - c[2].set_noclamp_rgba((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); + c[2].set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); break; case bc1_approx_mode::cBC1NVidia: - c[2].set_noclamp_rgba(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); + c[2].set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); break; case bc1_approx_mode::cBC1AMD: - c[2].set_noclamp_rgba(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); + c[2].set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); break; } - c[3].set_noclamp_rgba(0, 0, 0, 0); + c[3].set(0, 0, 0, 0); used_punchthrough = true; } diff --git a/src/rgbcx.h b/src/rgbcx.h index 90eb1c8..8a3b0e7 100644 --- a/src/rgbcx.h +++ b/src/rgbcx.h @@ -53,14 +53,7 @@ // #pragma once -#include -#include -#include #include -#include -#include -#include -#include "tables.h" // By default, the table used to accelerate cluster fit on 4 color blocks uses a 969x128 entry table. // To reduce the executable size, set RGBCX_USE_SMALLER_TABLES to 1, which selects the smaller 969x32 entry table. @@ -153,7 +146,6 @@ enum { cEncodeBC1EndpointSearchRoundsMask = 1023U << cEncodeBC1EndpointSearchRoundsShift, }; - // DEFAULT_TOTAL_ORDERINGS_TO_TRY is around 3x faster than libsquish at slightly higher average quality. 10-16 is a good range to start to compete against // libsquish. const uint32_t DEFAULT_TOTAL_ORDERINGS_TO_TRY = 10; diff --git a/src/util.h b/src/util.h new file mode 100644 index 0000000..42949da --- /dev/null +++ b/src/util.h @@ -0,0 +1,74 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + */ + +#pragma once +#include + +static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } +static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } + +static inline uint8_t to_5(uint32_t v) { + v = v * 31 + 128; + return (uint8_t)((v + (v >> 8)) >> 8); +} +static inline uint8_t to_6(uint32_t v) { + v = v * 63 + 128; + return (uint8_t)((v + (v >> 8)) >> 8); +} + +static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); } +static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); } + +template inline S maximum(S a, S b) { return (a > b) ? a : b; } +template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } +template inline S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); } + +template inline S minimum(S a, S b) { return (a < b) ? a : b; } +template inline S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); } +template inline S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); } + +template inline T square(T a) { return a * a; } + +static inline float clampf(float value, float low, float high) { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; +} +static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } + +template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } +static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { + if (value < low) + value = low; + else if (value > high) + value = high; + return value; +} + +static inline int squarei(int a) { return a * a; } +static inline int absi(int a) { return (a < 0) ? -a : a; } + +template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } + + + + +