diff --git a/.clang-format b/.clang-format index ee1cf84..ed7b74d 100644 --- a/.clang-format +++ b/.clang-format @@ -1,6 +1,10 @@ --- -BasedOnStyle: LLVM +BasedOnStyle: google IndentWidth: 4 ColumnLimit: 160 +AllowShortBlocksOnASingleLine: Always +AllowShortFunctionsOnASingleLine: All +AlwaysBreakTemplateDeclarations: MultiLine +#AlignConsecutiveDeclarations: true --- diff --git a/.clang-tidy b/.clang-tidy index 77c0cca..113d779 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,21 +1,18 @@ ---- -FormatStyle: LLVM -Checks: 'clang-diagnostic-*,clang-analyzer-*,readability-*' +FormatStyle: google +Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming,cppcoreguidelines-narrowing-conversions' CheckOptions: - - key: readability-identifier-naming.AbstractClassCase - value: 'CamelCase' - - key: readability-identifier-naming.ClassCase - value: 'CamelCase' - - key: readability-identifier-naming.EnumCase - value: 'CamelCase' - - key: readability-identifier-naming.StructCase - value: 'CamelCase' - - key: readability-identifier-naming.TypedefCase - value: 'CamelCase' - - key: readability-identifier-naming.MemberCase - value: 'CamelCase' - - key: readability-identifier-naming.FunctionCase - value: 'camelBack' - - key: readability-identifier-naming.ClassMethodCase - value: 'camelBack' \ No newline at end of file + - { key: readability-identifier-naming.NamespaceCase, value: lower_case } + - { key: readability-identifier-naming.ClassCase, value: CamelCase } + - { key: readability-identifier-naming.StructCase, value: CamelCase } + - { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase } + - { key: readability-identifier-naming.FunctionCase, value: aNy_CasE } + - { key: readability-identifier-naming.VariableCase, value: lower_case } + - { key: readability-identifier-naming.MemberCase, value: lower_case } + - { key: readability-identifier-naming.PrivateMemberPrefix, value: _ } + - { key: readability-identifier-naming.ProtectedMemberPrefix, value: _ } + - { key: readability-identifier-naming.EnumConstantCase, value: CamelCase } + - { key: readability-identifier-naming.ConstexprVariableCase, value: CamelCase } + - { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase } + - { key: readability-identifier-naming.MemberConstantCase, value: CamelCase } + - { key: readability-identifier-naming.StaticConstantCase, value: CamelCase } \ No newline at end of file diff --git a/.gitignore b/.gitignore index f51f421..fb19e63 100644 --- a/.gitignore +++ b/.gitignore @@ -17,4 +17,4 @@ install_manifest.txt compile_commands.json CTestTestfile.cmake _deps -cmake-build-debug +cmake-build-* diff --git a/CMakeLists.txt b/CMakeLists.txt index 4cafb2a..cbaab56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -18,8 +18,7 @@ source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCE_FILES} ${HEADER_FIL pybind11_add_module(python_rgbcx ${SOURCE_FILES} ${HEADER_FILES} - ${PYTHON_FILES} - ) + ${PYTHON_FILES}) add_executable(test_rgbcx ${SOURCE_FILES} @@ -27,5 +26,8 @@ add_executable(test_rgbcx ${TEST_FILES}) # Set module features, like C/C++ standards -target_compile_features(python_rgbcx PUBLIC cxx_std_17 c_std_11) -target_compile_features(test_rgbcx PUBLIC cxx_std_17 c_std_11) \ No newline at end of file +target_compile_features(python_rgbcx PUBLIC cxx_std_20 c_std_11) +target_compile_features(test_rgbcx PUBLIC cxx_std_20 c_std_11) + +set_property(TARGET python_rgbcx test_rgbcx PROPERTY INTERPROCEDURAL_OPTIMIZATION True) #enable FLTO if available +set_property(TARGET python_rgbcx test_rgbcx PROPERTY OSX_ARCHITECTURES_RELEASE x86_64 arm64) #Mach-O fat binary for arm and x86 diff --git a/src/blocks.cpp b/src/blocks.cpp index 791c8a9..78bfad7 100644 --- a/src/blocks.cpp +++ b/src/blocks.cpp @@ -18,38 +18,8 @@ */ #include "blocks.h" +#include "color.h" #include #include -// region Color32 implementation -Color32::Color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); } - -uint8_t Color32::operator[](uint32_t idx) const { - assert(idx < 4); - return C[idx]; -} - -uint8_t &Color32::operator[](uint32_t idx) { - assert(idx < 4); - return C[idx]; -} - -void Color32::set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va) { - C[0] = vr; - C[1] = vg; - C[2] = vb; - C[3] = va; -} - -void Color32::set(const Color32 &other) { - C[0] = other.C[0]; - C[1] = other.C[1]; - C[2] = other.C[2]; -} -Color32 Color32::min(const Color32 &a, const Color32 &b) { - return Color32(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); -} -Color32 Color32::max(const Color32 &a, const Color32 &b) { - return Color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); -} // endregion diff --git a/src/blocks.h b/src/blocks.h index 0492e7d..9223d3b 100644 --- a/src/blocks.h +++ b/src/blocks.h @@ -19,172 +19,120 @@ #pragma once -#include "util.h" +#include #include #include #include -constexpr inline uint8_t DXT1SelectorBits = 2U; +#include "color.h" +#include "util.h" #pragma pack(push, 1) -struct Color32 { - union { - struct { - uint8_t R; - uint8_t G; - uint8_t B; - uint8_t A; - }; - - uint8_t C[4]; - }; - - Color32() {} - - Color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va); - - void set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va); - - void set(const Color32 &other); - - uint8_t operator[](uint32_t idx) const; - uint8_t &operator[](uint32_t idx); - - bool operator==(const Color32 &rhs) const { - return R == rhs.R && G == rhs.G && B == rhs.B && A == rhs.A; +class BC1Block { + public: + uint16_t GetLowColor() const { return _low_color[0] | _low_color[1] << 8U; } + uint16_t GetHighColor() const { return _high_color[0] | _high_color[1] << 8U; } + Color32 GetLowColor32() const { return Color32::unpack565(GetLowColor()); } + Color32 GetHighColor32() const { return Color32::unpack565(GetHighColor()); } + + bool Is3Color() const { return GetLowColor() <= GetHighColor(); } + void SetLowColor(uint16_t c) { + _low_color[0] = c & 0xFF; + _low_color[1] = (c >> 8) & 0xFF; } - - static Color32 min(const Color32 &a, const Color32 &b); - static Color32 max(const Color32 &a, const Color32 &b); -}; - -struct BC1Block { - constexpr static inline size_t EndpointSize = 2; - constexpr static inline size_t SelectorSize = 4; - constexpr static inline uint8_t SelectorBits = 2; - constexpr static inline uint8_t SelectorValues = 1 << SelectorBits; - constexpr static inline uint8_t SelectorMask = SelectorValues - 1; - - uint8_t LowColor[EndpointSize]; - uint8_t HighColor[EndpointSize]; - uint8_t Selectors[SelectorSize]; - - inline uint32_t get_low_color() const { return LowColor[0] | (LowColor[1] << 8U); } - inline uint32_t get_high_color() const { return HighColor[0] | (HighColor[1] << 8U); } - inline bool is_3color() const { return get_low_color() <= get_high_color(); } - inline void set_low_color(uint16_t c) { - LowColor[0] = static_cast(c & 0xFF); - LowColor[1] = static_cast((c >> 8) & 0xFF); - } - inline void set_high_color(uint16_t c) { - HighColor[0] = static_cast(c & 0xFF); - HighColor[1] = static_cast((c >> 8) & 0xFF); + void SetHighColor(uint16_t c) { + _high_color[0] = c & 0xFF; + _high_color[1] = (c >> 8) & 0xFF; } - inline uint32_t get_selector(uint32_t x, uint32_t y) const { + uint32_t GetSelector(uint32_t x, uint32_t y) const { assert((x < 4U) && (y < 4U)); - return (Selectors[y] >> (x * SelectorBits)) & SelectorMask; + return (selectors[y] >> (x * SelectorBits)) & SelectorMask; } - inline void set_selector(uint32_t x, uint32_t y, uint32_t val) { + void SetSelector(uint32_t x, uint32_t y, uint32_t val) { assert((x < 4U) && (y < 4U) && (val < 4U)); - Selectors[y] &= (~(SelectorMask << (x * SelectorBits))); - Selectors[y] |= (val << (x * DXT1SelectorBits)); + selectors[y] &= (~(SelectorMask << (x * SelectorBits))); + selectors[y] |= (val << (x * SelectorBits)); } - static inline uint16_t pack_color(const Color32 &color, bool scaled, uint32_t bias = 127U) { - uint32_t r = color.R, g = color.G, b = color.B; - if (scaled) { - r = (r * 31U + bias) / 255U; - g = (g * 63U + bias) / 255U; - b = (b * 31U + bias) / 255U; - } - return static_cast(minimum(b, 31U) | (minimum(g, 63U) << 5U) | (minimum(r, 31U) << 11U)); - } - - static inline uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast(b | (g << 5U) | (r << 11U)); } - - static inline void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) { - r = (c >> 11) & 31; - g = (c >> 5) & 63; - b = c & 31; - - r = (r << 3) | (r >> 2); - g = (g << 2) | (g >> 4); - b = (b << 3) | (b >> 2); - } - - static inline void unpack_color_unscaled(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) { - r = (c >> 11) & 31; - g = (c >> 5) & 63; - b = c & 31; - } -}; - -struct BC4Block { - constexpr static inline size_t EndpointSize = 1; - constexpr static inline size_t SelectorSize = 6; - constexpr static inline uint8_t SelectorBits = 3; + constexpr static inline size_t EndpointSize = 2; + constexpr static inline size_t SelectorSize = 4; + constexpr static inline uint8_t SelectorBits = 2; constexpr static inline uint8_t SelectorValues = 1 << SelectorBits; constexpr static inline uint8_t SelectorMask = SelectorValues - 1; - uint8_t LowAlpha; - uint8_t HighAlpha; - uint8_t Selectors[SelectorSize]; + private: + std::array _low_color; + std::array _high_color; + + public: + std::array selectors; +}; - inline uint32_t get_low_alpha() const { return LowAlpha; } - inline uint32_t get_high_alpha() const { return HighAlpha; } - inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); } +class BC4Block { + public: + inline uint32_t GetLowAlpha() const { return low_alpha; } + inline uint32_t GetHighAlpha() const { return high_alpha; } + inline bool Is6Alpha() const { return GetLowAlpha() <= GetHighAlpha(); } - inline uint64_t get_selector_bits() const { - return ((uint64_t)((uint32_t)Selectors[0] | ((uint32_t)Selectors[1] << 8U) | ((uint32_t)Selectors[2] << 16U) | - ((uint32_t)Selectors[3] << 24U))) | - (((uint64_t)Selectors[4]) << 32U) | (((uint64_t)Selectors[5]) << 40U); + inline uint64_t GetSelectorBits() const { + return ((uint64_t)((uint32_t)selectors[0] | ((uint32_t)selectors[1] << 8U) | ((uint32_t)selectors[2] << 16U) | ((uint32_t)selectors[3] << 24U))) | + (((uint64_t)selectors[4]) << 32U) | (((uint64_t)selectors[5]) << 40U); } - inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const { + inline uint32_t GetSelector(uint32_t x, uint32_t y, uint64_t selector_bits) const { assert((x < 4U) && (y < 4U)); return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask); } - static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) { - pDst[0] = static_cast(l); - pDst[1] = static_cast(h); - pDst[2] = static_cast((l * 4 + h) / 5); - pDst[3] = static_cast((l * 3 + h * 2) / 5); - pDst[4] = static_cast((l * 2 + h * 3) / 5); - pDst[5] = static_cast((l + h * 4) / 5); - pDst[6] = 0; - pDst[7] = 255; - return 6; + static inline std::array GetBlockValues6(uint32_t l, uint32_t h) { + return {static_cast(l), + static_cast(h), + static_cast((l * 4 + h) / 5), + static_cast((l * 3 + h * 2) / 5), + static_cast((l * 2 + h * 3) / 5), + static_cast((l + h * 4) / 5), + 0, + 255}; } - static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) { - pDst[0] = static_cast(l); - pDst[1] = static_cast(h); - pDst[2] = static_cast((l * 6 + h) / 7); - pDst[3] = static_cast((l * 5 + h * 2) / 7); - pDst[4] = static_cast((l * 4 + h * 3) / 7); - pDst[5] = static_cast((l * 3 + h * 4) / 7); - pDst[6] = static_cast((l * 2 + h * 5) / 7); - pDst[7] = static_cast((l + h * 6) / 7); - return 8; + static inline std::array GetBlockValues8(uint32_t l, uint32_t h) { + return {static_cast(l), + static_cast(h), + static_cast((l * 6 + h) / 7), + static_cast((l * 5 + h * 2) / 7), + static_cast((l * 4 + h * 3) / 7), + static_cast((l * 3 + h * 4) / 7), + static_cast((l * 2 + h * 5) / 7), + static_cast((l + h * 6) / 7)}; } - static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) { + static inline std::array GetBlockValues(uint32_t l, uint32_t h) { if (l > h) - return get_block_values8(pDst, l, h); + return GetBlockValues8(l, h); else - return get_block_values6(pDst, l, h); + return GetBlockValues6(l, h); } -}; -struct BC3Block { - BC4Block AlphaBlock; - BC1Block ColorBlock; + constexpr static inline size_t EndpointSize = 1; + constexpr static inline size_t SelectorSize = 6; + constexpr static inline uint8_t SelectorBits = 3; + constexpr static inline uint8_t SelectorValues = 1 << SelectorBits; + constexpr static inline uint8_t SelectorMask = SelectorValues - 1; + + uint8_t low_alpha; + uint8_t high_alpha; + uint8_t selectors[SelectorSize]; }; -struct BC5Block { - BC4Block RBlock; - BC4Block GBlock; +class BC3Block { + public: + BC4Block alpha_block; + BC1Block color_block; }; +class BC5Block { + public: + BC4Block r_block; + BC4Block g_block; +}; #pragma pack(pop) \ No newline at end of file diff --git a/src/color.cpp b/src/color.cpp new file mode 100644 index 0000000..694516d --- /dev/null +++ b/src/color.cpp @@ -0,0 +1,76 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + */ + +#include "color.h" +#include +#include + +// region Color32 implementation +Color32::Color32() { set(0, 0, 0, 0xFF); } + +Color32::Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { set(R, G, B, A); } + +uint16_t Color32::pack565Unscaled(uint16_t R, uint16_t G, uint16_t B) { return B | (G << 5) | (R << 11); } + +uint16_t Color32::pack565(uint16_t R, uint16_t G, uint16_t B) { return pack565Unscaled(scale8To5(R), scale8To6(G), scale8To5(B)); } + +Color32 Color32::unpack565(uint16_t Packed) { + uint8_t R = scale5To8((Packed >> 11) & 0x1F); + uint8_t G = scale6To8((Packed >> 5) & 0x3F); + uint8_t B = scale5To8(Packed & 0x1F); + + return Color32(R, G, B); +} + +uint8_t Color32::operator[](uint32_t Index) const { + assert(Index < 4); + return C[Index]; +} + +uint8_t &Color32::operator[](uint32_t Index) { + assert(Index < 4); + return C[Index]; +} + +void Color32::set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { + this->R = R; + this->G = G; + this->B = B; + this->A = A; +} + +void Color32::set(const Color32 &Other) { + this->R = Other.R; + this->G = Other.G; + this->B = Other.B; + this->A = Other.A; +} + +Color32 Color32::min(const Color32 &a, const Color32 &b) { + return Color32(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3])); +} + +Color32 Color32::max(const Color32 &a, const Color32 &b) { + return Color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); +} + +uint16_t Color32::pack565() { return pack565(R, G, B); } + +uint16_t Color32::pack565Unscaled() { return pack565Unscaled(R, G, B); } +// endregion \ No newline at end of file diff --git a/src/color.h b/src/color.h new file mode 100644 index 0000000..864b7b9 --- /dev/null +++ b/src/color.h @@ -0,0 +1,64 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + */ + +#pragma once +#include "util.h" +#include +#include +#include + +#pragma pack(push, 1) +class Color32 { + public: + union { + struct { + uint8_t R; + uint8_t G; + uint8_t B; + uint8_t A; + }; + + uint8_t C[4]; + }; + + Color32(); + + Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A = 0xFF); + + static uint16_t pack565Unscaled(uint16_t R, uint16_t G, uint16_t B); + static uint16_t pack565(uint16_t R, uint16_t G, uint16_t B); + + static Color32 unpack565(uint16_t Packed); + + bool operator==(const Color32 &Rhs) const { return R == Rhs.R && G == Rhs.G && B == Rhs.B && A == Rhs.A; } + + uint8_t operator[](uint32_t Index) const; + uint8_t &operator[](uint32_t Index); + + uint16_t pack565(); + uint16_t pack565Unscaled(); + + static Color32 min(const Color32 &A, const Color32 &B); + static Color32 max(const Color32 &A, const Color32 &B); + + void set(uint8_t R, uint8_t G, uint8_t B, uint8_t A); + + void set(const Color32 &Other); +}; +#pragma pack(pop) \ No newline at end of file diff --git a/src/rgbcx.cpp b/src/rgbcx.cpp index 22d5229..cd5291f 100644 --- a/src/rgbcx.cpp +++ b/src/rgbcx.cpp @@ -1,16 +1,16 @@ // rgbcx.h v1.12 // High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 . +#include "rgbcx.h" +#include "blocks.h" +#include "color.h" +#include "tables.h" +#include "util.h" #include #include #include #include #include -#include -#include "util.h" -#include "tables.h" -#include "blocks.h" -#include "rgbcx.h" namespace rgbcx { @@ -159,7 +159,7 @@ static inline int interp_half_5_6_amd(int c0, int c1) { } static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) { - // assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); + // assert(scale_5_to_8(v0) == c0 && scale5To8(v1) == c1); switch (mode) { case bc1_approx_mode::cBC1NVidia: return interp_5_nv(v0, v1); @@ -176,7 +176,7 @@ static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) { (void)v0; (void)v1; - // assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); + // assert(scale_6_to_8(v0) == c0 && scale6To8(v1) == c1); switch (mode) { case bc1_approx_mode::cBC1NVidia: return interp_6_nv(c0, c1); @@ -191,7 +191,7 @@ static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) } static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) { - assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1); + assert(scale5To8(v0) == c0 && scale5To8(v1) == c1); switch (mode) { case bc1_approx_mode::cBC1NVidia: return interp_half_5_nv(v0, v1); @@ -207,7 +207,7 @@ static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) { (void)v0; (void)v1; - assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1); + assert(scale6To8(v0) == c0 && scale6To8(v1) == c1); switch (mode) { case bc1_approx_mode::cBC1NVidia: return interp_half_6_nv(c0, c1); @@ -434,12 +434,12 @@ void encode_bc1_solid_block(void *pDst, uint32_t fr, uint32_t fg, uint32_t fb, b } } - pDst_block->set_low_color(static_cast(max16)); - pDst_block->set_high_color(static_cast(min16)); - pDst_block->Selectors[0] = static_cast(mask); - pDst_block->Selectors[1] = static_cast(mask); - pDst_block->Selectors[2] = static_cast(mask); - pDst_block->Selectors[3] = static_cast(mask); + pDst_block->SetLowColor(static_cast(max16)); + pDst_block->SetHighColor(static_cast(min16)); + pDst_block->selectors[0] = static_cast(mask); + pDst_block->selectors[1] = static_cast(mask); + pDst_block->selectors[2] = static_cast(mask); + pDst_block->selectors[3] = static_cast(mask); } static const float g_midpoint5[32] = {.015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f, @@ -944,8 +944,8 @@ static inline void precise_round_565_noscale(vec3F xl, vec3F xh, int &trial_lr, } static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) { - uint32_t lc16 = BC1Block::pack_unscaled_color(lr, lg, lb); - uint32_t hc16 = BC1Block::pack_unscaled_color(hr, hg, hb); + uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb); + uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb); // Always forbid 3 color blocks if (lc16 == hc16) { @@ -964,13 +964,13 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int } assert(lc16 > hc16); - pDst_block->set_low_color(static_cast(lc16)); - pDst_block->set_high_color(static_cast(hc16)); + pDst_block->SetLowColor(static_cast(lc16)); + pDst_block->SetHighColor(static_cast(hc16)); - pDst_block->Selectors[0] = mask; - pDst_block->Selectors[1] = mask; - pDst_block->Selectors[2] = mask; - pDst_block->Selectors[3] = mask; + pDst_block->selectors[0] = mask; + pDst_block->selectors[1] = mask; + pDst_block->selectors[2] = mask; + pDst_block->selectors[3] = mask; } else { uint8_t invert_mask = 0; if (lc16 < hc16) { @@ -979,24 +979,25 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int } assert(lc16 > hc16); - pDst_block->set_low_color((uint16_t)lc16); - pDst_block->set_high_color((uint16_t)hc16); + pDst_block->SetLowColor((uint16_t)lc16); + pDst_block->SetHighColor((uint16_t)hc16); uint32_t packed_sels = 0; static const uint8_t s_sel_trans[4] = {0, 2, 3, 1}; for (uint32_t i = 0; i < 16; i++) packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2)); - pDst_block->Selectors[0] = (uint8_t)packed_sels ^ invert_mask; - pDst_block->Selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; - pDst_block->Selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; - pDst_block->Selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; + // todo: make this less silly to prevent packing and unpacking + pDst_block->selectors[0] = (uint8_t)packed_sels ^ invert_mask; + pDst_block->selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask; + pDst_block->selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask; + pDst_block->selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask; } } static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) { - uint32_t lc16 = BC1Block::pack_unscaled_color(lr, lg, lb); - uint32_t hc16 = BC1Block::pack_unscaled_color(hr, hg, hb); + uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb); + uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb); bool invert_flag = false; if (lc16 > hc16) { @@ -1006,8 +1007,8 @@ static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int assert(lc16 <= hc16); - pDst_block->set_low_color((uint16_t)lc16); - pDst_block->set_high_color((uint16_t)hc16); + pDst_block->SetLowColor((uint16_t)lc16); + pDst_block->SetHighColor((uint16_t)hc16); uint32_t packed_sels = 0; @@ -1021,10 +1022,11 @@ static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int packed_sels |= ((uint32_t)sels[i] << (i * 2)); } - pDst_block->Selectors[0] = (uint8_t)packed_sels; - pDst_block->Selectors[1] = (uint8_t)(packed_sels >> 8); - pDst_block->Selectors[2] = (uint8_t)(packed_sels >> 16); - pDst_block->Selectors[3] = (uint8_t)(packed_sels >> 24); + // todo: make this less silly to prevent packing and unpacking + pDst_block->selectors[0] = (uint8_t)packed_sels; + pDst_block->selectors[1] = (uint8_t)(packed_sels >> 8); + pDst_block->selectors[2] = (uint8_t)(packed_sels >> 16); + pDst_block->selectors[3] = (uint8_t)(packed_sels >> 24); } struct bc1_encode_results { @@ -1138,13 +1140,13 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags } } - int lr = scale_8_to_5(pSrc_pixels[low_c].R); - int lg = scale_8_to_6(pSrc_pixels[low_c].G); - int lb = scale_8_to_5(pSrc_pixels[low_c].B); + int lr = scale8To5(pSrc_pixels[low_c].R); + int lg = scale8To6(pSrc_pixels[low_c].G); + int lb = scale8To5(pSrc_pixels[low_c].B); - int hr = scale_8_to_5(pSrc_pixels[high_c].R); - int hg = scale_8_to_6(pSrc_pixels[high_c].G); - int hb = scale_8_to_5(pSrc_pixels[high_c].B); + int hr = scale8To5(pSrc_pixels[high_c].R); + int hg = scale8To6(pSrc_pixels[high_c].G); + int hb = scale8To5(pSrc_pixels[high_c].B); uint8_t trial_sels[16]; uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX); @@ -1491,14 +1493,14 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t // Grayscale blocks are a common enough case to specialize. if ((max_r - min_r) < 2) { - lr = lb = hr = hb = scale_8_to_5(fr); - lg = hg = scale_8_to_6(fr); + lr = lb = hr = hb = scale8To5(fr); + lg = hg = scale8To6(fr); } else { - lr = lb = scale_8_to_5(min_r); - lg = scale_8_to_6(min_r); + lr = lb = scale8To5(min_r); + lg = scale8To6(min_r); - hr = hb = scale_8_to_5(max_r); - hg = scale_8_to_6(max_r); + hr = hb = scale8To5(max_r); + hg = scale8To6(max_r); } } else if (flags & cEncodeBC1Use2DLS) { // 2D Least Squares approach from Humus's example, with added inset and optimal rounding. @@ -1721,13 +1723,13 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t if (icov_yz < 0) std::swap(y0, y1); - lr = scale_8_to_5(x0); - lg = scale_8_to_6(y0); - lb = scale_8_to_5(min_b); + lr = scale8To5(x0); + lg = scale8To6(y0); + lb = scale8To5(min_b); - hr = scale_8_to_5(x1); - hg = scale_8_to_6(y1); - hb = scale_8_to_5(max_b); + hr = scale8To5(x1); + hg = scale8To6(y1); + hb = scale8To5(max_b); } else { // Select 2 colors along the principle axis. (There must be a faster/simpler way.) uint32_t low_c = 0, high_c = 0; @@ -1806,13 +1808,13 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t low_c = low_dot & 15; high_c = high_dot & 15; - lr = scale_8_to_5(pSrc_pixels[low_c].R); - lg = scale_8_to_6(pSrc_pixels[low_c].G); - lb = scale_8_to_5(pSrc_pixels[low_c].B); + lr = scale8To5(pSrc_pixels[low_c].R); + lg = scale8To6(pSrc_pixels[low_c].G); + lb = scale8To5(pSrc_pixels[low_c].B); - hr = scale_8_to_5(pSrc_pixels[high_c].R); - hg = scale_8_to_6(pSrc_pixels[high_c].G); - hb = scale_8_to_5(pSrc_pixels[high_c].B); + hr = scale8To5(pSrc_pixels[high_c].R); + hg = scale8To6(pSrc_pixels[high_c].G); + hb = scale8To5(pSrc_pixels[high_c].B); } } @@ -2349,6 +2351,7 @@ void encode_bc4(void *pDst, const uint8_t *pPixels, uint32_t stride) { const uint64_t f = a0 | a1 | a2 | a3; + // TODO: make this less silly by using the BC4Block class pDst_bytes[2] = (uint8_t)f; pDst_bytes[3] = (uint8_t)(f >> 8U); pDst_bytes[4] = (uint8_t)(f >> 16U); @@ -2357,28 +2360,28 @@ void encode_bc4(void *pDst, const uint8_t *pPixels, uint32_t stride) { pDst_bytes[7] = (uint8_t)(f >> 40U); } -void encode_bc3(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t total_orderings_to_try) { +void encode_bc3(BC3Block *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t total_orderings_to_try) { assert(g_initialized); // 3-color blocks are not allowed with BC3 (on most GPU's). flags &= ~(cEncodeBC1Use3ColorBlocksForBlackPixels | cEncodeBC1Use3ColorBlocks); - encode_bc4(pDst, pPixels + 3, 4); - encode_bc1(static_cast(pDst) + 8, pPixels, flags, total_orderings_to_try); + encode_bc4(&pDst->alpha_block, pPixels + 3, 4); + encode_bc1(&pDst->color_block, pPixels, flags, total_orderings_to_try); } -void encode_bc3(uint32_t level, void *pDst, const uint8_t *pPixels) { +void encode_bc3(uint32_t level, BC3Block *pDst, const uint8_t *pPixels) { assert(g_initialized); - encode_bc4(pDst, pPixels + 3, 4); - encode_bc1(level, static_cast(pDst) + 8, pPixels, false, false); + encode_bc4(&pDst->alpha_block, pPixels + 3, 4); + encode_bc1(level, &pDst->color_block, pPixels, false, false); } -void encode_bc5(void *pDst, const uint8_t *pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) { +void encode_bc5(BC5Block *pDst, const uint8_t *pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) { assert(g_initialized); - encode_bc4(pDst, pPixels + chan0, stride); - encode_bc4(static_cast(pDst) + 8, pPixels + chan1, stride); + encode_bc4(&pDst->r_block, pPixels + chan0, stride); + encode_bc4(&pDst->g_block, pPixels + chan1, stride); } // Returns true if the block uses 3 color punchthrough alpha mode. @@ -2390,8 +2393,8 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr const BC1Block *pBlock = static_cast(pBlock_bits); - const uint32_t l = pBlock->get_low_color(); - const uint32_t h = pBlock->get_high_color(); + const uint32_t l = pBlock->GetLowColor(); + const uint32_t h = pBlock->GetHighColor(); Color32 c[4]; @@ -2454,17 +2457,17 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr if (set_alpha) { for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) { - pDst_pixels[0] = c[pBlock->get_selector(0, y)]; - pDst_pixels[1] = c[pBlock->get_selector(1, y)]; - pDst_pixels[2] = c[pBlock->get_selector(2, y)]; - pDst_pixels[3] = c[pBlock->get_selector(3, y)]; + pDst_pixels[0] = c[pBlock->GetSelector(0, y)]; + pDst_pixels[1] = c[pBlock->GetSelector(1, y)]; + pDst_pixels[2] = c[pBlock->GetSelector(2, y)]; + pDst_pixels[3] = c[pBlock->GetSelector(3, y)]; } } else { for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) { - pDst_pixels[0].set(c[pBlock->get_selector(0, y)]); - pDst_pixels[1].set(c[pBlock->get_selector(1, y)]); - pDst_pixels[2].set(c[pBlock->get_selector(2, y)]); - pDst_pixels[3].set(c[pBlock->get_selector(3, y)]); + pDst_pixels[0].set(c[pBlock->GetSelector(0, y)]); + pDst_pixels[1].set(c[pBlock->GetSelector(1, y)]); + pDst_pixels[2].set(c[pBlock->GetSelector(2, y)]); + pDst_pixels[3].set(c[pBlock->GetSelector(3, y)]); } } @@ -2476,16 +2479,15 @@ void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride) { const BC4Block *pBlock = static_cast(pBlock_bits); - uint8_t sel_values[8]; - BC4Block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha()); + auto sel_values = BC4Block::GetBlockValues(pBlock->GetLowAlpha(), pBlock->GetHighAlpha()); - const uint64_t selector_bits = pBlock->get_selector_bits(); + const uint64_t selector_bits = pBlock->GetSelectorBits(); for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U)) { - pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)]; - pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)]; - pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)]; - pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)]; + pPixels[0] = sel_values[pBlock->GetSelector(0, y, selector_bits)]; + pPixels[stride * 1] = sel_values[pBlock->GetSelector(1, y, selector_bits)]; + pPixels[stride * 2] = sel_values[pBlock->GetSelector(2, y, selector_bits)]; + pPixels[stride * 3] = sel_values[pBlock->GetSelector(3, y, selector_bits)]; } } diff --git a/src/util.h b/src/util.h index 8d02427..55d21fb 100644 --- a/src/util.h +++ b/src/util.h @@ -23,17 +23,17 @@ static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast(-i) : static_cast(i); } -static inline uint8_t scale_8_to_5(uint32_t v) { +static inline uint8_t scale8To5(uint32_t v) { v = v * 31 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } -static inline uint8_t scale_8_to_6(uint32_t v) { +static inline uint8_t scale8To6(uint32_t v) { v = v * 63 + 128; return (uint8_t)((v + (v >> 8)) >> 8); } -static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); } -static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); } +static inline int scale5To8(int v) { return (v << 3) | (v >> 2); } +static inline int scale6To8(int v) { return (v << 2) | (v >> 4); } template inline S maximum(S a, S b) { return (a > b) ? a : b; } template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); } @@ -55,7 +55,7 @@ static inline float clampf(float value, float low, float high) { static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); } template inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); } -static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { +static inline int32_t clampi(int32_t value, int32_t low, int32_t high) { if (value < low) value = low; else if (value > high) @@ -67,8 +67,3 @@ static inline int squarei(int a) { return a * a; } static inline int absi(int a) { return (a < 0) ? -a : a; } template inline F lerp(F a, F b, F s) { return a + (b - a) * s; } - - - - -