diff --git a/src/BC1Decoder.cpp b/src/BC1Decoder.cpp new file mode 100644 index 0000000..09f3cb4 --- /dev/null +++ b/src/BC1Decoder.cpp @@ -0,0 +1,44 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich 2020 + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#include "BC1Decoder.h" + +#include + +#include "ColorBlock.h" + +void rgbcx::BC1Decoder::DecodeBlock(const Color4x4 *dest, const BC1Block *block) { + const unsigned l = block->GetLowColor(); + const unsigned h = block->GetHighColor(); + + const auto l_color = Color32::Unpack565(l); + const auto h_color = Color32::Unpack565(h); + + std::array colors; + colors[0] = l_color; + colors[1] = h_color; + + bool three_color = (h >= l); + if (three_color) { + colors[2] = _interpolator.InterpolateHalfColor(l_color, h_color); + colors[3] = Color32(0,0,0); + } else { + colors[2] = _interpolator.InterpolateColor() + } +} diff --git a/src/BC1Decoder.h b/src/BC1Decoder.h new file mode 100644 index 0000000..c0f64b4 --- /dev/null +++ b/src/BC1Decoder.h @@ -0,0 +1,37 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich 2020 + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#pragma once + +#include "BlockDecoder.h" +#include "blocks.h" +#include "interpolator.h" +namespace rgbcx { +class BC1Decoder : public BlockDecoder { + public: + BC1Decoder(const Interpolator &interpolator) : _interpolator(interpolator) {} + + BC1Decoder() : BC1Decoder(Interpolator()) {} + + void DecodeBlock(const Color4x4 *dest, const BC1Block *block) override; + + private: + const Interpolator &_interpolator; +}; +} // namespace rgbcx diff --git a/src/blocks.cpp b/src/BlockDecoder.h similarity index 72% rename from src/blocks.cpp rename to src/BlockDecoder.h index 2300410..df92681 100644 --- a/src/blocks.cpp +++ b/src/BlockDecoder.h @@ -17,6 +17,18 @@ along with this program. If not, see . */ -#include "blocks.h" +#pragma once -// endregion +#include + +#include "ColorBlock.h" + +namespace rgbcx { + +template class BlockDecoder { + public: + using DecodedBlock = ColorBlock; + using EncodedBlock = B; + virtual void DecodeBlock(const DecodedBlock *dest, const EncodedBlock *block) = 0; +}; +} // namespace rgbcx diff --git a/src/BlockEncoder.h b/src/BlockEncoder.h new file mode 100644 index 0000000..050a45d --- /dev/null +++ b/src/BlockEncoder.h @@ -0,0 +1,34 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich 2020 + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#pragma once + +#include + +#include "ColorBlock.h" + +namespace rgbcx { + +template class BlockEncoder { + public: + using DecodedBlock = ColorBlock; + using EncodedBlock = B; + virtual void EncodeBlock(const EncodedBlock *dest, const DecodedBlock *pixels) = 0; +}; +} // namespace rgbcx diff --git a/src/ColorBlock.h b/src/ColorBlock.h index ebce5b6..6aef664 100644 --- a/src/ColorBlock.h +++ b/src/ColorBlock.h @@ -20,6 +20,7 @@ #pragma once #include +#include #include #include @@ -41,13 +42,13 @@ template class ColorBlock { } ColorBlock(const T *image, int imageWidth, int imageHeight, int x = 0, int y = 0) { - int imageX = x * width(); - int imageY = y * height(); + int image_x = x * width(); + int image_y = y * height(); - assert(imageX > 0 && imageX + width() < imageWidth); - assert(imageY > 0 && imageY + height() < imageHeight); + assert(image_x > 0 && image_x + width() < imageWidth); + assert(image_y > 0 && image_y + height() < imageHeight); - T *start = &image[imageX + (imageY * imageWidth)]; + T *start = &image[image_x + (image_y * imageWidth)]; for (int i = 0; i < height(); i++) { _rows[i] = std::span(start[i * imageWidth]); } } @@ -62,5 +63,4 @@ template class ColorBlock { std::array _rows; }; -using RGBABlock4x4 = ColorBlock<4, 4, Color32>; -using RBlock4x4 = ColorBlock<4, 4, uint8_t>; +using Color4x4= ColorBlock<4, 4, Color32>; diff --git a/src/bitwiseEnums.h b/src/bitwiseEnums.h new file mode 100644 index 0000000..2929eb8 --- /dev/null +++ b/src/bitwiseEnums.h @@ -0,0 +1,58 @@ +/* Python-rgbcx Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich 2020 + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#pragma once + +#include + +// Thanks dkavolis +template >> constexpr inline auto operator~(E a) noexcept -> E { + using Base = std::underlying_type_t; + return static_cast(~static_cast(a)); +} + +template >> constexpr inline auto operator|(E a, E b) noexcept -> E { + using Base = std::underlying_type_t; + return static_cast(static_cast(a) | static_cast(b)); +} + +template >> constexpr inline auto operator&(E a, E b) noexcept -> E { + using Base = std::underlying_type_t; + return static_cast(static_cast(a) & static_cast(b)); +} + +template >> constexpr inline auto operator^(E a, E b) noexcept -> E { + using Base = std::underlying_type_t; + return static_cast(static_cast(a) ^ static_cast(b)); +} + +template >> constexpr inline auto operator|=(E& a, E b) noexcept -> E& { + a = a | b; + return a; +} + +template >> constexpr inline auto operator&=(E& a, E b) noexcept -> E& { + a = a & b; + return a; +} + +template >> constexpr inline auto operator^=(E& a, E b) noexcept -> E& { + a = a ^ b; + return a; +} \ No newline at end of file diff --git a/src/blocks.h b/src/blocks.h index e40372c..0d24b8a 100644 --- a/src/blocks.h +++ b/src/blocks.h @@ -31,8 +31,8 @@ class BC1Block { public: uint16_t GetLowColor() const { return _low_color[0] | _low_color[1] << 8U; } uint16_t GetHighColor() const { return _high_color[0] | _high_color[1] << 8U; } - Color32 GetLowColor32() const { return Color32::unpack565(GetLowColor()); } - Color32 GetHighColor32() const { return Color32::unpack565(GetHighColor()); } + Color32 GetLowColor32() const { return Color32::Unpack565(GetLowColor()); } + Color32 GetHighColor32() const { return Color32::Unpack565(GetHighColor()); } bool Is3Color() const { return GetLowColor() <= GetHighColor(); } void SetLowColor(uint16_t c) { diff --git a/src/color.cpp b/src/color.cpp index 273121f..1c70906 100644 --- a/src/color.cpp +++ b/src/color.cpp @@ -25,15 +25,15 @@ #include "util.h" // region Color32 implementation -Color32::Color32() { set(0, 0, 0, 0xFF); } +Color32::Color32() { Set(0, 0, 0, 0xFF); } -Color32::Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { set(R, G, B, A); } +Color32::Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { Set(R, G, B, A); } -uint16_t Color32::pack565Unscaled(uint16_t R, uint16_t G, uint16_t B) { return B | (G << 5) | (R << 11); } +uint16_t Color32::Pack565Unscaled(uint16_t R, uint16_t G, uint16_t B) { return B | (G << 5) | (R << 11); } -uint16_t Color32::pack565(uint16_t R, uint16_t G, uint16_t B) { return pack565Unscaled(scale8To5(R), scale8To6(G), scale8To5(B)); } +uint16_t Color32::Pack565(uint16_t R, uint16_t G, uint16_t B) { return Pack565Unscaled(scale8To5(R), scale8To6(G), scale8To5(B)); } -Color32 Color32::unpack565(uint16_t Packed) { +Color32 Color32::Unpack565(uint16_t Packed) { uint8_t R = scale5To8((Packed >> 11) & 0x1F); uint8_t G = scale6To8((Packed >> 5) & 0x3F); uint8_t B = scale5To8(Packed & 0x1F); @@ -43,26 +43,26 @@ Color32 Color32::unpack565(uint16_t Packed) { uint8_t Color32::operator[](uint32_t Index) const { assert(Index < 4); - return C[Index]; + return c[Index]; } uint8_t &Color32::operator[](uint32_t Index) { assert(Index < 4); - return C[Index]; + return c[Index]; } -void Color32::set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { - this->R = R; - this->G = G; - this->B = B; - this->A = A; +void Color32::Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { + this->r = R; + this->g = G; + this->b = B; + this->a = A; } -void Color32::set(const Color32 &Other) { - this->R = Other.R; - this->G = Other.G; - this->B = Other.B; - this->A = Other.A; +void Color32::Set(const Color32 &Other) { + this->r = Other.r; + this->g = Other.g; + this->b = Other.b; + this->a = Other.a; } Color32 Color32::min(const Color32 &a, const Color32 &b) { @@ -73,7 +73,7 @@ Color32 Color32::max(const Color32 &a, const Color32 &b) { return Color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); } -uint16_t Color32::pack565() { return pack565(R, G, B); } +uint16_t Color32::pack565() { return Pack565(r, g, b); } -uint16_t Color32::pack565Unscaled() { return pack565Unscaled(R, G, B); } +uint16_t Color32::pack565Unscaled() { return Pack565Unscaled(r, g, b); } // endregion \ No newline at end of file diff --git a/src/color.h b/src/color.h index ab92017..3da26e0 100644 --- a/src/color.h +++ b/src/color.h @@ -18,6 +18,7 @@ */ #pragma once +#include #include #pragma pack(push, 1) @@ -25,25 +26,25 @@ class Color32 { public: union { struct { - uint8_t R; - uint8_t G; - uint8_t B; - uint8_t A; + uint8_t r; + uint8_t g; + uint8_t b; + uint8_t a; }; - uint8_t C[4]; + std::array c; }; Color32(); Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A = 0xFF); - static uint16_t pack565Unscaled(uint16_t R, uint16_t G, uint16_t B); - static uint16_t pack565(uint16_t R, uint16_t G, uint16_t B); + static uint16_t Pack565Unscaled(uint16_t R, uint16_t G, uint16_t B); + static uint16_t Pack565(uint16_t R, uint16_t G, uint16_t B); - static Color32 unpack565(uint16_t Packed); + static Color32 Unpack565(uint16_t Packed); - bool operator==(const Color32 &Rhs) const { return R == Rhs.R && G == Rhs.G && B == Rhs.B && A == Rhs.A; } + bool operator==(const Color32 &Rhs) const { return r == Rhs.r && g == Rhs.g && b == Rhs.b && a == Rhs.a; } uint8_t operator[](uint32_t Index) const; uint8_t &operator[](uint32_t Index); @@ -54,8 +55,8 @@ class Color32 { static Color32 min(const Color32 &A, const Color32 &B); static Color32 max(const Color32 &A, const Color32 &B); - void set(uint8_t R, uint8_t G, uint8_t B, uint8_t A); + void Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A); - void set(const Color32 &Other); + void Set(const Color32 &Other); }; #pragma pack(pop) \ No newline at end of file diff --git a/src/interpolator.cpp b/src/interpolator.cpp index 57c3592..65c5e45 100644 --- a/src/interpolator.cpp +++ b/src/interpolator.cpp @@ -27,6 +27,7 @@ namespace rgbcx { +/* Interpolator::Interpolator() { PrepSingleColorTables(_single_match5, _single_match5_half, 5); PrepSingleColorTables(_single_match5, _single_match5_half, 6); @@ -66,8 +67,8 @@ void Interpolator::PrepSingleColorTables(const MatchListPtr &matchTable, const M } } } -int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, - bool half, bool ideal) { +int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half, + bool ideal) { int e = iabs(v - i); // We only need to factor in 3% error in BC1 ideal mode. @@ -84,30 +85,48 @@ int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int } return e; -} +}*/ + +// region Interpolator implementation +int Interpolator::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); } +int Interpolator::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); } +int Interpolator::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); } +int Interpolator::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); } + +std::array Interpolator::InterpolateBC1(uint16_t low, uint16_t high) { + auto colors = std::array(); + colors[0] = Color32::Unpack565(low); + colors[1] = Color32::Unpack565(high); + + if (low > high) { + // 4-color mode + colors[2] = InterpolateColor24(colors[0], colors[1]); + colors[3] = InterpolateColor24(colors[1], colors[0]); + } else { + // 3-color mode + colors[2] = InterpolateHalfColor24(colors[0], colors[1]); + colors[3] = Color32(0, 0, 0, 0); // transparent black + } -// region InterpolatorIdeal implementation -int InterpolatorIdeal::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); } -int InterpolatorIdeal::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); } -int InterpolatorIdeal::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); } -int InterpolatorIdeal::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); } + return colors; +} -int InterpolatorIdeal::Interpolate5or6(int v0, int v1) const { +int Interpolator::Interpolate8(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 * 2 + v1) / 3; } -int InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) const { +int Interpolator::InterpolateHalf8(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 + v1) / 2; } // endregion -// region InterpolatorIdealRound implementation -int InterpolatorIdealRound::Interpolate5(int v0, int v1) const { return Interpolate5or6Round(v0, v1); } -int InterpolatorIdealRound::Interpolate6(int v0, int v1) const { return Interpolate5or6Round(v0, v1); } +// region InterpolatorRound implementation +int InterpolatorRound::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); } +int InterpolatorRound::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); } -int InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) const { +int InterpolatorRound::Interpolate8(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 * 2 + v1 + 1) / 3; } @@ -120,7 +139,7 @@ int InterpolatorNvidia::Interpolate5(int v0, int v1) const { } int InterpolatorNvidia::Interpolate6(int v0, int v1) const { - assert(v0 < 256 && v1 < 256); + assert(v0 < 64 && v1 < 64); const int gdiff = v1 - v0; return (256 * v0 + (gdiff / 4) + 128 + gdiff * 80) / 256; } @@ -131,24 +150,24 @@ int InterpolatorNvidia::InterpolateHalf5(int v0, int v1) const { } int InterpolatorNvidia::InterpolateHalf6(int v0, int v1) const { - assert(v0 < 256 && v1 < 256); + assert(v0 < 64 && v1 < 64); const int gdiff = v1 - v0; return (256 * v0 + gdiff / 4 + 128 + gdiff * 128) / 256; } // endregion // region InterpolatorAMD implementation -int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); } -int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); } -int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); } -int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); } +int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); } +int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); } +int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); } +int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); } -int InterpolatorAMD::Interpolate5or6(int v0, int v1) const { +int InterpolatorAMD::Interpolate8(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 * 43 + v1 * 21 + 32) >> 6; } -int InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) const { +int InterpolatorAMD::InterpolateHalf8(int v0, int v1) const { assert(v0 < 256 && v1 < 256); return (v0 + v1 + 1) >> 1; } diff --git a/src/interpolator.h b/src/interpolator.h index c1a6025..0105d21 100644 --- a/src/interpolator.h +++ b/src/interpolator.h @@ -23,6 +23,7 @@ #include #include +#include "color.h" #include "util.h" #ifdef NDEBUG // asserts disabled @@ -41,105 +42,122 @@ template static constexpr std::array E class Interpolator { public: - struct MatchEntry { - uint8_t high; - uint8_t low; - uint8_t error; - }; + // struct MatchEntry { + // uint8_t high; + // uint8_t low; + // uint8_t error; + // }; - Interpolator(); - virtual ~Interpolator() noexcept = default; - - virtual int Interpolate5(int v0, int v1) = 0; - virtual int Interpolate6(int v0, int v1) = 0; - virtual int InterpolateHalf5(int v0, int v1) = 0; - virtual int InterpolateHalf6(int v0, int v1) = 0; - - constexpr MatchEntry GetMatch5(int i) noexcept(ndebug) { - assert(i < match_count); - return (*_single_match5)[i]; - } - constexpr MatchEntry GetMatch6(int i) noexcept(ndebug) { - assert(i < match_count); - return (*_single_match6)[i]; - } - constexpr MatchEntry GetMatchHalf5(int i) noexcept(ndebug) { - assert(i < match_count); - return (*_single_match5_half)[i]; - } - constexpr MatchEntry GetMatchHalf6(int i) noexcept(ndebug) { - assert(i < match_count); - return (*_single_match6_half)[i]; - } - - private: constexpr static inline size_t size5 = 32; constexpr static inline size_t size6 = 64; - constexpr static inline size_t match_count = 256; - constexpr static auto Expand5 = ExpandArray(); - constexpr static auto Expand6 = ExpandArray(); - - // match tables used for single-color blocks - using MatchList = std::array; - using MatchListPtr = std::shared_ptr; - - const MatchListPtr _single_match5 = {std::make_shared()}; - const MatchListPtr _single_match6 = {std::make_shared()}; - const MatchListPtr _single_match5_half = {std::make_shared()}; - const MatchListPtr _single_match6_half = {std::make_shared()}; - - virtual constexpr bool isIdeal() noexcept { return false; } - virtual constexpr bool useExpandedInMatch() noexcept { return true; } - - void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len); - - int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half, - bool ideal); -}; + virtual ~Interpolator() noexcept = default; -class InterpolatorIdeal : public Interpolator { - public: + /** + * Performs a 2/3 interpolation of a pair of 5-bit values to produce an 8-bit value + * Output is approximately (2v0 + v1)/3, with v0 and v1 first extended to 8 bits. + * @param v0 The first 5-bit value + * @param v1 The second 5-bit value + * @return The interpolated value + */ virtual int Interpolate5(int v0, int v1) const; + + /** + * Performs a 2/3 interpolation of a pair of 5-bit values to produce an 8-bit value + * Output is approximately (2v0 + v1)/3, with v0 and v1 first extended to 8 bits. + * @param v0 The first 5-bit value + * @param v1 The second 5-bit value + * @return The interpolated value + */ virtual int Interpolate6(int v0, int v1) const; + + /** + * Performs a 1/2 interpolation of a pair of 5-bit values to produce an 8-bit value + * Output is approximately (v0 + v1)/2, with v0 and v1 first extended to 8 bits. + * @param v0 The first 5-bit value + * @param v1 The second 5-bit value + * @return The interpolated value + */ virtual int InterpolateHalf5(int v0, int v1) const; + + /** + * Performs a 1/2 interpolation of a pair of 6-bit values to produce an 8-bit value + * Output is approximately (v0 + v1)/2, with v0 and v1 first extended to 8 bits. + * @param v0 The first 6-bit value + * @param v1 The second 6-bit value + * @return The interpolated value + */ virtual int InterpolateHalf6(int v0, int v1) const; + /** + * Generates the 4 colors for a BC1 block from the given 5:6:5-packed colors + * @param low first 5:6:5 color for the block + * @param high second 5:6:5 color for the block + * @return and array of 4 Color32 values, with indices matching BC1 selectors + */ + std::array InterpolateBC1(uint16_t low, uint16_t high); + private: - int Interpolate5or6(int v0, int v1) const; - int InterpolateHalf5or6(int v0, int v1) const; - virtual constexpr bool isIdeal() noexcept override { return true; } + virtual int Interpolate8(int v0, int v1) const; + virtual int InterpolateHalf8(int v0, int v1) const; + + // constexpr static auto Expand5 = ExpandArray(); + // constexpr static auto Expand6 = ExpandArray(); + // + // // match tables used for single-color blocks + // using MatchList = std::array; + // using MatchListPtr = std::shared_ptr; + // + // const MatchListPtr _single_match5 = {std::make_shared()}; + // const MatchListPtr _single_match6 = {std::make_shared()}; + // const MatchListPtr _single_match5_half = {std::make_shared()}; + // const MatchListPtr _single_match6_half = {std::make_shared()}; + + Color32 InterpolateColor24(const Color32 &c0, const Color32 &c1) const { + return Color32(Interpolate8(c0.r, c1.r), Interpolate8(c0.g, c1.g), Interpolate8(c0.b, c1.b)); + } + + Color32 InterpolateHalfColor24(const Color32 &c0, const Color32 &c1) const { + return Color32(InterpolateHalf8(c0.r, c1.r), InterpolateHalf8(c0.g, c1.g), InterpolateHalf8(c0.b, c1.b)); + } + + virtual constexpr bool isIdeal() noexcept { return true; } + // virtual constexpr bool useExpandedInMatch() noexcept { return true; } + // + // void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len); + // + // int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half, + // bool ideal); }; -class InterpolatorIdealRound : public InterpolatorIdeal { +class InterpolatorRound : public Interpolator { public: - virtual int Interpolate5(int v0, int v1) const override; - virtual int Interpolate6(int v0, int v1) const override; + int Interpolate5(int v0, int v1) const override; + int Interpolate6(int v0, int v1) const override; private: - int Interpolate5or6Round(int v0, int v1) const; + int Interpolate8(int v0, int v1) const; }; class InterpolatorNvidia : public Interpolator { public: - virtual int Interpolate5(int v0, int v1) const; - virtual int Interpolate6(int v0, int v1) const; - virtual int InterpolateHalf5(int v0, int v1) const; - virtual int InterpolateHalf6(int v0, int v1) const; - - private: - virtual constexpr bool useExpandedInMatch() noexcept override { return false; } + int Interpolate5(int v0, int v1) const override; + int Interpolate6(int v0, int v1) const override; + int InterpolateHalf5(int v0, int v1) const override; + int InterpolateHalf6(int v0, int v1) const override; + constexpr bool isIdeal() noexcept override { return false; } }; class InterpolatorAMD : public Interpolator { public: - virtual int Interpolate5(int v0, int v1) const; - virtual int Interpolate6(int v0, int v1) const; - virtual int InterpolateHalf5(int v0, int v1) const; - virtual int InterpolateHalf6(int v0, int v1) const; + int Interpolate5(int v0, int v1) const override; + int Interpolate6(int v0, int v1) const override; + int InterpolateHalf5(int v0, int v1) const override; + int InterpolateHalf6(int v0, int v1) const override; + constexpr bool isIdeal() noexcept override { return false; } private: - int Interpolate5or6(int v0, int v1) const; - int InterpolateHalf5or6(int v0, int v1) const; + int Interpolate8(int v0, int v1) const; + int InterpolateHalf8(int v0, int v1) const; }; } // namespace rgbcx \ No newline at end of file diff --git a/src/rgbcx.cpp b/src/rgbcx.cpp index a39928e..9c78fe9 100644 --- a/src/rgbcx.cpp +++ b/src/rgbcx.cpp @@ -476,7 +476,7 @@ static inline bool compute_least_squares_endpoints4_rgb(const Color32 *pColors, uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0; uint32_t weight_accum = 0; for (uint32_t i = 0; i < 16; i++) { - const uint8_t r = pColors[i].C[0], g = pColors[i].C[1], b = pColors[i].C[2]; + const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; const uint8_t sel = pSelectors[i]; weight_accum += g_weight_vals4[sel]; @@ -547,7 +547,7 @@ static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const Co uint32_t weight_accum = 0; int total_r = 0, total_g = 0, total_b = 0; for (uint32_t i = 0; i < 16; i++) { - const uint8_t r = pColors[i].C[0], g = pColors[i].C[1], b = pColors[i].C[2]; + const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2]; if (use_black) { if ((r | g | b) < 4) continue; } @@ -684,10 +684,10 @@ static inline void bc1_find_sels4_noerr(const Color32 *pSrc_pixels, uint32_t lr, static const uint8_t s_sels[4] = {3, 2, 1, 0}; for (uint32_t i = 0; i < 16; i += 4) { - const int d0 = pSrc_pixels[i + 0].R * ar + pSrc_pixels[i + 0].G * ag + pSrc_pixels[i + 0].B * ab; - const int d1 = pSrc_pixels[i + 1].R * ar + pSrc_pixels[i + 1].G * ag + pSrc_pixels[i + 1].B * ab; - const int d2 = pSrc_pixels[i + 2].R * ar + pSrc_pixels[i + 2].G * ag + pSrc_pixels[i + 2].B * ab; - const int d3 = pSrc_pixels[i + 3].R * ar + pSrc_pixels[i + 3].G * ag + pSrc_pixels[i + 3].B * ab; + const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab; + const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab; + const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab; + const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab; sels[i + 0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; sels[i + 1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; @@ -717,10 +717,10 @@ static inline uint32_t bc1_find_sels4_fasterr(const Color32 *pSrc_pixels, uint32 uint32_t total_err = 0; for (uint32_t i = 0; i < 16; i += 4) { - const int d0 = pSrc_pixels[i + 0].R * ar + pSrc_pixels[i + 0].G * ag + pSrc_pixels[i + 0].B * ab; - const int d1 = pSrc_pixels[i + 1].R * ar + pSrc_pixels[i + 1].G * ag + pSrc_pixels[i + 1].B * ab; - const int d2 = pSrc_pixels[i + 2].R * ar + pSrc_pixels[i + 2].G * ag + pSrc_pixels[i + 2].B * ab; - const int d3 = pSrc_pixels[i + 3].R * ar + pSrc_pixels[i + 3].G * ag + pSrc_pixels[i + 3].B * ab; + const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab; + const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab; + const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab; + const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab; uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)]; uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)]; @@ -733,13 +733,13 @@ static inline uint32_t bc1_find_sels4_fasterr(const Color32 *pSrc_pixels, uint32 sels[i + 3] = sel3; total_err += - squarei(pSrc_pixels[i + 0].R - block_r[sel0]) + squarei(pSrc_pixels[i + 0].G - block_g[sel0]) + squarei(pSrc_pixels[i + 0].B - block_b[sel0]); + squarei(pSrc_pixels[i + 0].r - block_r[sel0]) + squarei(pSrc_pixels[i + 0].g - block_g[sel0]) + squarei(pSrc_pixels[i + 0].b - block_b[sel0]); total_err += - squarei(pSrc_pixels[i + 1].R - block_r[sel1]) + squarei(pSrc_pixels[i + 1].G - block_g[sel1]) + squarei(pSrc_pixels[i + 1].B - block_b[sel1]); + squarei(pSrc_pixels[i + 1].r - block_r[sel1]) + squarei(pSrc_pixels[i + 1].g - block_g[sel1]) + squarei(pSrc_pixels[i + 1].b - block_b[sel1]); total_err += - squarei(pSrc_pixels[i + 2].R - block_r[sel2]) + squarei(pSrc_pixels[i + 2].G - block_g[sel2]) + squarei(pSrc_pixels[i + 2].B - block_b[sel2]); + squarei(pSrc_pixels[i + 2].r - block_r[sel2]) + squarei(pSrc_pixels[i + 2].g - block_g[sel2]) + squarei(pSrc_pixels[i + 2].b - block_b[sel2]); total_err += - squarei(pSrc_pixels[i + 3].R - block_r[sel3]) + squarei(pSrc_pixels[i + 3].G - block_g[sel3]) + squarei(pSrc_pixels[i + 3].B - block_b[sel3]); + squarei(pSrc_pixels[i + 3].r - block_r[sel3]) + squarei(pSrc_pixels[i + 3].g - block_g[sel3]) + squarei(pSrc_pixels[i + 3].b - block_b[sel3]); if (total_err >= cur_err) break; } @@ -759,9 +759,9 @@ static inline uint32_t bc1_find_sels4_check2_err(const Color32 *pSrc_pixels, uin uint32_t total_err = 0; for (uint32_t i = 0; i < 16; i++) { - const int r = pSrc_pixels[i].R; - const int g = pSrc_pixels[i].G; - const int b = pSrc_pixels[i].B; + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f); sel = clampi(sel, 1, 3); @@ -796,9 +796,9 @@ static inline uint32_t bc1_find_sels4_fullerr(const Color32 *pSrc_pixels, uint32 uint32_t total_err = 0; for (uint32_t i = 0; i < 16; i++) { - const int r = pSrc_pixels[i].R; - const int g = pSrc_pixels[i].G; - const int b = pSrc_pixels[i].B; + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); uint8_t best_sel = 0; @@ -842,9 +842,9 @@ static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const Color32 *pSr uint32_t total_err = 0; for (uint32_t i = 0; i < 16; i++) { - const int r = pSrc_pixels[i].R; - const int g = pSrc_pixels[i].G; - const int b = pSrc_pixels[i].B; + const int r = pSrc_pixels[i].r; + const int g = pSrc_pixels[i].g; + const int b = pSrc_pixels[i].b; uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b); uint32_t best_sel = 0; @@ -923,8 +923,8 @@ static inline void precise_round_565_noscale(vec3F xl, vec3F xh, int &trial_lr, } static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) { - uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb); - uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb); + uint16_t lc16 = Color32::Pack565Unscaled(lr, lg, lb); + uint16_t hc16 = Color32::Pack565Unscaled(hr, hg, hb); // Always forbid 3 color blocks if (lc16 == hc16) { @@ -974,8 +974,8 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int } static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) { - uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb); - uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb); + uint16_t lc16 = Color32::Pack565Unscaled(lr, lg, lb); + uint16_t hc16 = Color32::Pack565Unscaled(hr, hg, hb); bool invert_flag = false; if (lc16 > hc16) { @@ -1018,7 +1018,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags int min_r = 255, min_g = 255, min_b = 255; int total_pixels = 0; for (uint32_t i = 0; i < 16; i++) { - const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B; + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; if ((r | g | b) < 4) continue; max_r = std::max(max_r, r); @@ -1045,9 +1045,9 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags int icov[6] = {0, 0, 0, 0, 0, 0}; for (uint32_t i = 0; i < 16; i++) { - int r = (int)pSrc_pixels[i].R; - int g = (int)pSrc_pixels[i].G; - int b = (int)pSrc_pixels[i].B; + int r = (int)pSrc_pixels[i].r; + int g = (int)pSrc_pixels[i].g; + int b = (int)pSrc_pixels[i].b; if ((r | g | b) < 4) continue; @@ -1094,7 +1094,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags int low_dot = INT_MAX, high_dot = INT_MIN; for (uint32_t i = 0; i < 16; i++) { - int r = (int)pSrc_pixels[i].R, g = (int)pSrc_pixels[i].G, b = (int)pSrc_pixels[i].B; + int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b; if ((r | g | b) < 4) continue; @@ -1109,13 +1109,13 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags } } - int lr = scale8To5(pSrc_pixels[low_c].R); - int lg = scale8To6(pSrc_pixels[low_c].G); - int lb = scale8To5(pSrc_pixels[low_c].B); + int lr = scale8To5(pSrc_pixels[low_c].r); + int lg = scale8To6(pSrc_pixels[low_c].g); + int lb = scale8To5(pSrc_pixels[low_c].b); - int hr = scale8To5(pSrc_pixels[high_c].R); - int hg = scale8To6(pSrc_pixels[high_c].G); - int hb = scale8To5(pSrc_pixels[high_c].B); + int hr = scale8To5(pSrc_pixels[high_c].r); + int hg = scale8To6(pSrc_pixels[high_c].g); + int hb = scale8To5(pSrc_pixels[high_c].b); uint8_t trial_sels[16]; uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX); @@ -1236,9 +1236,9 @@ static bool try_3color_block(const Color32 *pSrc_pixels, uint32_t flags, uint32_ int dots[16]; for (uint32_t i = 0; i < 16; i++) { - int r = pSrc_pixels[i].R; - int g = pSrc_pixels[i].G; - int b = pSrc_pixels[i].B; + int r = pSrc_pixels[i].r; + int g = pSrc_pixels[i].g; + int b = pSrc_pixels[i].b; int d = 0x1000000 + (r * ar + g * ag + b * ab); assert(d >= 0); dots[i] = (d << 4) + i; @@ -1255,9 +1255,9 @@ static bool try_3color_block(const Color32 *pSrc_pixels, uint32_t flags, uint32_ g_sum[i] = g; b_sum[i] = b; - r += pSrc_pixels[p].R; - g += pSrc_pixels[p].G; - b += pSrc_pixels[p].B; + r += pSrc_pixels[p].r; + g += pSrc_pixels[p].g; + b += pSrc_pixels[p].b; } r_sum[16] = total_r; @@ -1338,7 +1338,7 @@ void encode_bc1(uint32_t level, void *pDst, const uint8_t *pPixels, bool allow_3 flags = cEncodeBC1BoundingBoxInt; break; case 1: - // Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0. + // Faster/higher quality than stb_dxt default. a bit higher average quality vs. mode 0. flags = cEncodeBC1Use2DLS; break; case 2: @@ -1452,11 +1452,12 @@ void encode_bc1(uint32_t level, void *pDst, const uint8_t *pPixels, bool allow_3 encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3); } +// Finds low and high colors to begin with static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t flags, bool grayscale_flag, int min_r, int min_g, int min_b, int max_r, int max_g, int max_b, int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b, int &lr, int &lg, int &lb, int &hr, int &hg, int &hb) { if (grayscale_flag) { - const int fr = pSrc_pixels[0].R; + const int fr = pSrc_pixels[0].r; // Grayscale blocks are a common enough case to specialize. if ((max_r - min_r) < 2) { @@ -1480,7 +1481,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t vec3F l, h; if (big_chan == 0) { for (uint32_t i = 0; i < 16; i++) { - const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B; + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; sum_xy_r += r * r, sum_xy_g += r * g, sum_xy_b += r * b; } @@ -1521,7 +1522,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t h.c[0] = fmax_chan_val; } else if (big_chan == 1) { for (uint32_t i = 0; i < 16; i++) { - const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B; + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; sum_xy_r += g * r, sum_xy_g += g * g, sum_xy_b += g * b; } @@ -1562,7 +1563,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t h.c[1] = fmax_chan_val; } else { for (uint32_t i = 0; i < 16; i++) { - const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B; + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; sum_xy_r += b * r, sum_xy_g += b * g, sum_xy_b += b * b; } @@ -1630,9 +1631,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t int icov_xz = 0, icov_yz = 0; for (uint32_t i = 0; i < 16; i++) { - int r = (int)pSrc_pixels[i].R - avg_r; - int g = (int)pSrc_pixels[i].G - avg_g; - int b = (int)pSrc_pixels[i].B - avg_b; + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; icov_xz += r * b; icov_yz += g * b; } @@ -1668,9 +1669,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t int icov_xz = 0, icov_yz = 0; for (uint32_t i = 0; i < 16; i++) { - int r = (int)pSrc_pixels[i].R - avg_r; - int g = (int)pSrc_pixels[i].G - avg_g; - int b = (int)pSrc_pixels[i].B - avg_b; + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; icov_xz += r * b; icov_yz += g * b; } @@ -1680,6 +1681,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t int x1 = max_r; int y1 = max_g; + // swap r and g min and max to align principal axis if (icov_xz < 0) std::swap(x0, x1); if (icov_yz < 0) std::swap(y0, y1); @@ -1697,9 +1699,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t int icov[6] = {0, 0, 0, 0, 0, 0}; for (uint32_t i = 0; i < 16; i++) { - int r = (int)pSrc_pixels[i].R - avg_r; - int g = (int)pSrc_pixels[i].G - avg_g; - int b = (int)pSrc_pixels[i].B - avg_b; + int r = (int)pSrc_pixels[i].r - avg_r; + int g = (int)pSrc_pixels[i].g - avg_g; + int b = (int)pSrc_pixels[i].b - avg_b; icov[0] += r * r; icov[1] += r * g; icov[2] += r * b; @@ -1746,10 +1748,10 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t saxis_b = (int)((uint32_t)saxis_b << 4U); for (uint32_t i = 0; i < 16; i += 4) { - int dot0 = ((pSrc_pixels[i].R * saxis_r + pSrc_pixels[i].G * saxis_g + pSrc_pixels[i].B * saxis_b) & ~0xF) + i; - int dot1 = ((pSrc_pixels[i + 1].R * saxis_r + pSrc_pixels[i + 1].G * saxis_g + pSrc_pixels[i + 1].B * saxis_b) & ~0xF) + i + 1; - int dot2 = ((pSrc_pixels[i + 2].R * saxis_r + pSrc_pixels[i + 2].G * saxis_g + pSrc_pixels[i + 2].B * saxis_b) & ~0xF) + i + 2; - int dot3 = ((pSrc_pixels[i + 3].R * saxis_r + pSrc_pixels[i + 3].G * saxis_g + pSrc_pixels[i + 3].B * saxis_b) & ~0xF) + i + 3; + int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i; + int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1; + int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2; + int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3; int min_d01 = std::min(dot0, dot1); int max_d01 = std::max(dot0, dot1); @@ -1766,13 +1768,13 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t low_c = low_dot & 15; high_c = high_dot & 15; - lr = scale8To5(pSrc_pixels[low_c].R); - lg = scale8To6(pSrc_pixels[low_c].G); - lb = scale8To5(pSrc_pixels[low_c].B); + lr = scale8To5(pSrc_pixels[low_c].r); + lg = scale8To6(pSrc_pixels[low_c].g); + lb = scale8To5(pSrc_pixels[low_c].b); - hr = scale8To5(pSrc_pixels[high_c].R); - hg = scale8To6(pSrc_pixels[high_c].G); - hb = scale8To5(pSrc_pixels[high_c].B); + hr = scale8To5(pSrc_pixels[high_c].r); + hg = scale8To6(pSrc_pixels[high_c].g); + hb = scale8To5(pSrc_pixels[high_c].b); } } @@ -1857,11 +1859,11 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b; - const uint32_t fr = pSrc_pixels[0].R, fg = pSrc_pixels[0].G, fb = pSrc_pixels[0].B; + const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b; uint32_t j; for (j = 15; j >= 1; --j) - if ((pSrc_pixels[j].R != fr) || (pSrc_pixels[j].G != fg) || (pSrc_pixels[j].B != fb)) break; + if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) break; if (j == 0) { encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0); @@ -1877,7 +1879,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot uint32_t any_black_pixels = (fr | fg | fb) < 4; for (uint32_t i = 1; i < 16; i++) { - const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B; + const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b; grayscale_flag &= ((r == g) && (r == b)); any_black_pixels |= ((r | g | b) < 4); @@ -1953,6 +1955,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot } // ls_pass } else { + // calculate block error from naïve approach const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1; for (uint32_t round = 0; round < total_rounds; round++) { uint32_t modified_flags = flags; @@ -2065,9 +2068,9 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot int dots[16]; for (uint32_t i = 0; i < 16; i++) { - int r = pSrc_pixels[i].R; - int g = pSrc_pixels[i].G; - int b = pSrc_pixels[i].B; + int r = pSrc_pixels[i].r; + int g = pSrc_pixels[i].g; + int b = pSrc_pixels[i].b; int d = 0x1000000 + (r * ar + g * ag + b * ab); assert(d >= 0); dots[i] = (d << 4) + i; @@ -2084,9 +2087,9 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot g_sum[i] = g; b_sum[i] = b; - r += pSrc_pixels[p].R; - g += pSrc_pixels[p].G; - b += pSrc_pixels[p].B; + r += pSrc_pixels[p].r; + g += pSrc_pixels[p].g; + b += pSrc_pixels[p].b; } r_sum[16] = total_r; @@ -2368,43 +2371,43 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr bool used_punchthrough = false; if (l > h) { - c[0].set(r0, g0, b0, 255); - c[1].set(r1, g1, b1, 255); + c[0].Set(r0, g0, b0, 255); + c[1].Set(r1, g1, b1, 255); switch (mode) { case bc1_approx_mode::cBC1Ideal: - c[2].set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); - c[3].set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); + c[2].Set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255); + c[3].Set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255); break; case bc1_approx_mode::cBC1IdealRound4: - c[2].set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); - c[3].set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); + c[2].Set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255); + c[3].Set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255); break; case bc1_approx_mode::cBC1NVidia: - c[2].set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); - c[3].set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); + c[2].Set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255); + c[3].Set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255); break; case bc1_approx_mode::cBC1AMD: - c[2].set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); - c[3].set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); + c[2].Set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255); + c[3].Set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255); break; } } else { - c[0].set(r0, g0, b0, 255); - c[1].set(r1, g1, b1, 255); + c[0].Set(r0, g0, b0, 255); + c[1].Set(r1, g1, b1, 255); switch (mode) { case bc1_approx_mode::cBC1Ideal: case bc1_approx_mode::cBC1IdealRound4: - c[2].set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); + c[2].Set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255); break; case bc1_approx_mode::cBC1NVidia: - c[2].set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); + c[2].Set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255); break; case bc1_approx_mode::cBC1AMD: - c[2].set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); + c[2].Set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255); break; } - c[3].set(0, 0, 0, 0); + c[3].Set(0, 0, 0, 0); used_punchthrough = true; } @@ -2417,10 +2420,10 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr } } else { for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) { - pDst_pixels[0].set(c[pBlock->GetSelector(0, y)]); - pDst_pixels[1].set(c[pBlock->GetSelector(1, y)]); - pDst_pixels[2].set(c[pBlock->GetSelector(2, y)]); - pDst_pixels[3].set(c[pBlock->GetSelector(3, y)]); + pDst_pixels[0].Set(c[pBlock->GetSelector(0, y)]); + pDst_pixels[1].Set(c[pBlock->GetSelector(1, y)]); + pDst_pixels[2].Set(c[pBlock->GetSelector(2, y)]); + pDst_pixels[3].Set(c[pBlock->GetSelector(3, y)]); } } @@ -2452,7 +2455,7 @@ bool unpack_bc3(const void *pBlock_bits, void *pPixels, bc1_approx_mode mode) { if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(BC4Block), pDst_pixels, true, mode)) success = false; - unpack_bc4(pBlock_bits, &pDst_pixels[0].A, sizeof(Color32)); + unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(Color32)); return success; } diff --git a/src/rgbcx.h b/src/rgbcx.h index 19653c4..9386346 100644 --- a/src/rgbcx.h +++ b/src/rgbcx.h @@ -56,6 +56,7 @@ #include #include "blocks.h" +#include "interpolator.h" // By default, the table used to accelerate cluster fit on 4 color blocks uses a 969x128 entry table. // To reduce the executable size, set RGBCX_USE_SMALLER_TABLES to 1, which selects the smaller 969x32 entry table. diff --git a/src/util.h b/src/util.h index 6918207..a43b2e9 100644 --- a/src/util.h +++ b/src/util.h @@ -32,8 +32,8 @@ static inline uint8_t scale8To6(uint32_t v) { return (uint8_t)((v + (v >> 8)) >> 8); } -static inline int scale5To8(int v) { return (v << 3) | (v >> 2); } -static inline int scale6To8(int v) { return (v << 2) | (v >> 4); } +static constexpr int scale5To8(int v) { return (v << 3) | (v >> 2); } +static constexpr int scale6To8(int v) { return (v << 2) | (v >> 4); } template inline S maximum(S a, S b) { return (a > b) ? a : b; } template inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }