Fix interpolators which I didnt understand before

I have zero clue what I'm doing
3 years ago · 1cf01f6f01
parent d678567dc7
commit 1cf01f6f01
14 changed files with 464 additions and 237 deletions
--- a/src/BC1Decoder.cpp
+++ b/src/BC1Decoder.cpp
@ -0,0 +1,44 @@
+/*  Python-rgbcx Texture Compression Library
+    Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
+    Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
+    and licenced under the public domain
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "BC1Decoder.h"
+
+#include <array>
+
+#include "ColorBlock.h"
+
+void rgbcx::BC1Decoder::DecodeBlock(const Color4x4 *dest, const BC1Block *block) {
+    const unsigned l = block->GetLowColor();
+    const unsigned h = block->GetHighColor();
+
+    const auto l_color = Color32::Unpack565(l);
+    const auto h_color = Color32::Unpack565(h);
+
+    std::array<Color32, 4> colors;
+    colors[0] = l_color;
+    colors[1] = h_color;
+
+    bool three_color = (h >= l);
+    if (three_color) {
+        colors[2] = _interpolator.InterpolateHalfColor(l_color, h_color);
+        colors[3] = Color32(0,0,0);
+    } else {
+        colors[2] = _interpolator.InterpolateColor()
+    }
+}
--- a/src/BC1Decoder.h
+++ b/src/BC1Decoder.h
@ -0,0 +1,37 @@
+/*  Python-rgbcx Texture Compression Library
+    Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
+    Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
+    and licenced under the public domain
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "BlockDecoder.h"
+#include "blocks.h"
+#include "interpolator.h"
+namespace rgbcx {
+class BC1Decoder : public BlockDecoder<BC1Block, 4, 4> {
+   public:
+    BC1Decoder(const Interpolator &interpolator) : _interpolator(interpolator) {}
+
+    BC1Decoder() : BC1Decoder(Interpolator()) {}
+
+    void DecodeBlock(const Color4x4 *dest, const BC1Block *block) override;
+
+   private:
+    const Interpolator &_interpolator;
+};
+}  // namespace rgbcx
--- a/src/BlockDecoder.h
+++ b/src/BlockDecoder.h
@ -17,6 +17,18 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include "blocks.h"
+#pragma once

-// endregion
+#include <cstdint>
+
+#include "ColorBlock.h"
+
+namespace rgbcx {
+
+template <class B, size_t M, size_t N> class BlockDecoder {
+   public:
+    using DecodedBlock = ColorBlock<M, N, Color32>;
+    using EncodedBlock = B;
+    virtual void DecodeBlock(const DecodedBlock *dest, const EncodedBlock *block) = 0;
+};
+}  // namespace rgbcx
--- a/src/BlockEncoder.h
+++ b/src/BlockEncoder.h
@ -0,0 +1,34 @@
+/*  Python-rgbcx Texture Compression Library
+    Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
+    Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
+    and licenced under the public domain
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <cstdint>
+
+#include "ColorBlock.h"
+
+namespace rgbcx {
+
+template <class B, size_t M, size_t N> class BlockEncoder {
+   public:
+    using DecodedBlock = ColorBlock<M, N, Color32>;
+    using EncodedBlock = B;
+    virtual void EncodeBlock(const EncodedBlock *dest, const DecodedBlock *pixels) = 0;
+};
+}  // namespace rgbcx
--- a/src/ColorBlock.h
+++ b/src/ColorBlock.h
@ -20,6 +20,7 @@
 #pragma once

 #include <array>
+#include <cassert>
 #include <cstdint>
 #include <span>

@ -41,13 +42,13 @@ template <size_t M, size_t N, class T> class ColorBlock {
    }

    ColorBlock(const T *image, int imageWidth, int imageHeight, int x = 0, int y = 0) {
-        int imageX = x * width();
-        int imageY = y * height();
+        int image_x = x * width();
+        int image_y = y * height();

-        assert(imageX > 0 && imageX + width() < imageWidth);
-        assert(imageY > 0 && imageY + height() < imageHeight);
+        assert(image_x > 0 && image_x + width() < imageWidth);
+        assert(image_y > 0 && image_y + height() < imageHeight);

-        T *start = &image[imageX + (imageY * imageWidth)];
+        T *start = &image[image_x + (image_y * imageWidth)];

        for (int i = 0; i < height(); i++) { _rows[i] = std::span(start[i * imageWidth]); }
    }
@ -62,5 +63,4 @@ template <size_t M, size_t N, class T> class ColorBlock {
    std::array<row, M> _rows;
 };

-using RGBABlock4x4 = ColorBlock<4, 4, Color32>;
-using RBlock4x4 = ColorBlock<4, 4, uint8_t>;
+using Color4x4= ColorBlock<4, 4, Color32>;
--- a/src/bitwiseEnums.h
+++ b/src/bitwiseEnums.h
@ -0,0 +1,58 @@
+/*  Python-rgbcx Texture Compression Library
+    Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
+    Partially derived from rgbcx.h written by Richard Geldreich 2020 <richgel99@gmail.com>
+    and licenced under the public domain
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <type_traits>
+
+// Thanks dkavolis
+template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator~(E a) noexcept -> E {
+    using Base = std::underlying_type_t<E>;
+    return static_cast<E>(~static_cast<Base>(a));
+}
+
+template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator|(E a, E b) noexcept -> E {
+    using Base = std::underlying_type_t<E>;
+    return static_cast<E>(static_cast<Base>(a) | static_cast<Base>(b));
+}
+
+template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator&(E a, E b) noexcept -> E {
+    using Base = std::underlying_type_t<E>;
+    return static_cast<E>(static_cast<Base>(a) & static_cast<Base>(b));
+}
+
+template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator^(E a, E b) noexcept -> E {
+    using Base = std::underlying_type_t<E>;
+    return static_cast<E>(static_cast<Base>(a) ^ static_cast<Base>(b));
+}
+
+template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator|=(E& a, E b) noexcept -> E& {
+    a = a | b;
+    return a;
+}
+
+template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator&=(E& a, E b) noexcept -> E& {
+    a = a & b;
+    return a;
+}
+
+template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator^=(E& a, E b) noexcept -> E& {
+    a = a ^ b;
+    return a;
+}
--- a/src/blocks.h
+++ b/src/blocks.h
@ -31,8 +31,8 @@ class BC1Block {
   public:
    uint16_t GetLowColor() const { return _low_color[0] | _low_color[1] << 8U; }
    uint16_t GetHighColor() const { return _high_color[0] | _high_color[1] << 8U; }
-    Color32 GetLowColor32() const { return Color32::unpack565(GetLowColor()); }
-    Color32 GetHighColor32() const { return Color32::unpack565(GetHighColor()); }
+    Color32 GetLowColor32() const { return Color32::Unpack565(GetLowColor()); }
+    Color32 GetHighColor32() const { return Color32::Unpack565(GetHighColor()); }

    bool Is3Color() const { return GetLowColor() <= GetHighColor(); }
    void SetLowColor(uint16_t c) {
--- a/src/color.cpp
+++ b/src/color.cpp
@ -25,15 +25,15 @@
 #include "util.h"

 // region Color32 implementation
-Color32::Color32() { set(0, 0, 0, 0xFF); }
+Color32::Color32() { Set(0, 0, 0, 0xFF); }

-Color32::Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { set(R, G, B, A); }
+Color32::Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { Set(R, G, B, A); }

-uint16_t Color32::pack565Unscaled(uint16_t R, uint16_t G, uint16_t B) { return B | (G << 5) | (R << 11); }
+uint16_t Color32::Pack565Unscaled(uint16_t R, uint16_t G, uint16_t B) { return B | (G << 5) | (R << 11); }

-uint16_t Color32::pack565(uint16_t R, uint16_t G, uint16_t B) { return pack565Unscaled(scale8To5(R), scale8To6(G), scale8To5(B)); }
+uint16_t Color32::Pack565(uint16_t R, uint16_t G, uint16_t B) { return Pack565Unscaled(scale8To5(R), scale8To6(G), scale8To5(B)); }

-Color32 Color32::unpack565(uint16_t Packed) {
+Color32 Color32::Unpack565(uint16_t Packed) {
    uint8_t R = scale5To8((Packed >> 11) & 0x1F);
    uint8_t G = scale6To8((Packed >> 5) & 0x3F);
    uint8_t B = scale5To8(Packed & 0x1F);
@ -43,26 +43,26 @@ Color32 Color32::unpack565(uint16_t Packed) {

 uint8_t Color32::operator[](uint32_t Index) const {
    assert(Index < 4);
-    return C[Index];
+    return c[Index];
 }

 uint8_t &Color32::operator[](uint32_t Index) {
    assert(Index < 4);
-    return C[Index];
+    return c[Index];
 }

-void Color32::set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) {
-    this->R = R;
-    this->G = G;
-    this->B = B;
-    this->A = A;
+void Color32::Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) {
+    this->r = R;
+    this->g = G;
+    this->b = B;
+    this->a = A;
 }

-void Color32::set(const Color32 &Other) {
-    this->R = Other.R;
-    this->G = Other.G;
-    this->B = Other.B;
-    this->A = Other.A;
+void Color32::Set(const Color32 &Other) {
+    this->r = Other.r;
+    this->g = Other.g;
+    this->b = Other.b;
+    this->a = Other.a;
 }

 Color32 Color32::min(const Color32 &a, const Color32 &b) {
@ -73,7 +73,7 @@ Color32 Color32::max(const Color32 &a, const Color32 &b) {
    return Color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3]));
 }

-uint16_t Color32::pack565() { return pack565(R, G, B); }
+uint16_t Color32::pack565() { return Pack565(r, g, b); }

-uint16_t Color32::pack565Unscaled() { return pack565Unscaled(R, G, B); }
+uint16_t Color32::pack565Unscaled() { return Pack565Unscaled(r, g, b); }
 // endregion
--- a/src/color.h
+++ b/src/color.h
@ -18,6 +18,7 @@
 */

 #pragma once
+#include <array>
 #include <cstdint>

 #pragma pack(push, 1)
@ -25,25 +26,25 @@ class Color32 {
   public:
    union {
        struct {
-            uint8_t R;
-            uint8_t G;
-            uint8_t B;
-            uint8_t A;
+            uint8_t r;
+            uint8_t g;
+            uint8_t b;
+            uint8_t a;
        };

-        uint8_t C[4];
+        std::array<uint8_t, 4> c;
    };

    Color32();

    Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A = 0xFF);

-    static uint16_t pack565Unscaled(uint16_t R, uint16_t G, uint16_t B);
-    static uint16_t pack565(uint16_t R, uint16_t G, uint16_t B);
+    static uint16_t Pack565Unscaled(uint16_t R, uint16_t G, uint16_t B);
+    static uint16_t Pack565(uint16_t R, uint16_t G, uint16_t B);

-    static Color32 unpack565(uint16_t Packed);
+    static Color32 Unpack565(uint16_t Packed);

-    bool operator==(const Color32 &Rhs) const { return R == Rhs.R && G == Rhs.G && B == Rhs.B && A == Rhs.A; }
+    bool operator==(const Color32 &Rhs) const { return r == Rhs.r && g == Rhs.g && b == Rhs.b && a == Rhs.a; }

    uint8_t operator[](uint32_t Index) const;
    uint8_t &operator[](uint32_t Index);
@ -54,8 +55,8 @@ class Color32 {
    static Color32 min(const Color32 &A, const Color32 &B);
    static Color32 max(const Color32 &A, const Color32 &B);

-    void set(uint8_t R, uint8_t G, uint8_t B, uint8_t A);
+    void Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A);

-    void set(const Color32 &Other);
+    void Set(const Color32 &Other);
 };
 #pragma pack(pop)
--- a/src/interpolator.cpp
+++ b/src/interpolator.cpp
@ -27,6 +27,7 @@

 namespace rgbcx {

+/*
 Interpolator::Interpolator() {
    PrepSingleColorTables(_single_match5, _single_match5_half, 5);
    PrepSingleColorTables(_single_match5, _single_match5_half, 6);
@ -66,8 +67,8 @@ void Interpolator::PrepSingleColorTables(const MatchListPtr &matchTable, const M
        }
    }
 }
-int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error,
-                                                 bool half, bool ideal) {
+int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
+                                            bool ideal) {
    int e = iabs(v - i);

    // We only need to factor in 3% error in BC1 ideal mode.
@ -84,30 +85,48 @@ int Interpolator::PrepSingleColorTableEntry(const MatchListPtr &matchTable, int
    }

    return e;
-}
+}*/
+
+// region Interpolator implementation
+int Interpolator::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
+int Interpolator::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
+int Interpolator::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
+int Interpolator::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
+
+std::array<Color32, 4> Interpolator::InterpolateBC1(uint16_t low, uint16_t high) {
+    auto colors = std::array<Color32, 4>();
+    colors[0] = Color32::Unpack565(low);
+    colors[1] = Color32::Unpack565(high);
+
+    if (low > high) {
+        // 4-color mode
+        colors[2] = InterpolateColor24(colors[0], colors[1]);
+        colors[3] = InterpolateColor24(colors[1], colors[0]);
+    } else {
+        // 3-color mode
+        colors[2] = InterpolateHalfColor24(colors[0], colors[1]);
+        colors[3] = Color32(0, 0, 0, 0);  // transparent black
+    }

-// region InterpolatorIdeal implementation
-int InterpolatorIdeal::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); }
-int InterpolatorIdeal::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); }
-int InterpolatorIdeal::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
-int InterpolatorIdeal::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
+    return colors;
+}

-int InterpolatorIdeal::Interpolate5or6(int v0, int v1) const {
+int Interpolator::Interpolate8(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 * 2 + v1) / 3;
 }

-int InterpolatorIdeal::InterpolateHalf5or6(int v0, int v1) const {
+int Interpolator::InterpolateHalf8(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 + v1) / 2;
 }
 // endregion

-// region InterpolatorIdealRound implementation
-int InterpolatorIdealRound::Interpolate5(int v0, int v1) const { return Interpolate5or6Round(v0, v1); }
-int InterpolatorIdealRound::Interpolate6(int v0, int v1) const { return Interpolate5or6Round(v0, v1); }
+// region InterpolatorRound implementation
+int InterpolatorRound::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
+int InterpolatorRound::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }

-int InterpolatorIdealRound::Interpolate5or6Round(int v0, int v1) const {
+int InterpolatorRound::Interpolate8(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 * 2 + v1 + 1) / 3;
 }
@ -120,7 +139,7 @@ int InterpolatorNvidia::Interpolate5(int v0, int v1) const {
 }

 int InterpolatorNvidia::Interpolate6(int v0, int v1) const {
-    assert(v0 < 256 && v1 < 256);
+    assert(v0 < 64 && v1 < 64);
    const int gdiff = v1 - v0;
    return (256 * v0 + (gdiff / 4) + 128 + gdiff * 80) / 256;
 }
@ -131,24 +150,24 @@ int InterpolatorNvidia::InterpolateHalf5(int v0, int v1) const {
 }

 int InterpolatorNvidia::InterpolateHalf6(int v0, int v1) const {
-    assert(v0 < 256 && v1 < 256);
+    assert(v0 < 64 && v1 < 64);
    const int gdiff = v1 - v0;
    return (256 * v0 + gdiff / 4 + 128 + gdiff * 128) / 256;
 }
 // endregion

 // region InterpolatorAMD implementation
-int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate5or6(v0, v1); }
-int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate5or6(v0, v1); }
-int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
-int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf5or6(v0, v1); }
+int InterpolatorAMD::Interpolate5(int v0, int v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
+int InterpolatorAMD::Interpolate6(int v0, int v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
+int InterpolatorAMD::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
+int InterpolatorAMD::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }

-int InterpolatorAMD::Interpolate5or6(int v0, int v1) const {
+int InterpolatorAMD::Interpolate8(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 * 43 + v1 * 21 + 32) >> 6;
 }

-int InterpolatorAMD::InterpolateHalf5or6(int v0, int v1) const {
+int InterpolatorAMD::InterpolateHalf8(int v0, int v1) const {
    assert(v0 < 256 && v1 < 256);
    return (v0 + v1 + 1) >> 1;
 }
--- a/src/interpolator.h
+++ b/src/interpolator.h
@ -23,6 +23,7 @@
 #include <cstdint>
 #include <memory>

+#include "color.h"
 #include "util.h"

 #ifdef NDEBUG  // asserts disabled
@ -41,105 +42,122 @@ template <size_t size, int op(int)> static constexpr std::array<uint8_t, size> E

 class Interpolator {
   public:
-    struct MatchEntry {
-        uint8_t high;
-        uint8_t low;
-        uint8_t error;
-    };
+    //    struct MatchEntry {
+    //        uint8_t high;
+    //        uint8_t low;
+    //        uint8_t error;
+    //    };

-    Interpolator();
-    virtual ~Interpolator() noexcept = default;
-
-    virtual int Interpolate5(int v0, int v1) = 0;
-    virtual int Interpolate6(int v0, int v1) = 0;
-    virtual int InterpolateHalf5(int v0, int v1) = 0;
-    virtual int InterpolateHalf6(int v0, int v1) = 0;
-
-    constexpr MatchEntry GetMatch5(int i) noexcept(ndebug) {
-        assert(i < match_count);
-        return (*_single_match5)[i];
-    }
-    constexpr MatchEntry GetMatch6(int i) noexcept(ndebug) {
-        assert(i < match_count);
-        return (*_single_match6)[i];
-    }
-    constexpr MatchEntry GetMatchHalf5(int i) noexcept(ndebug) {
-        assert(i < match_count);
-        return (*_single_match5_half)[i];
-    }
-    constexpr MatchEntry GetMatchHalf6(int i) noexcept(ndebug) {
-        assert(i < match_count);
-        return (*_single_match6_half)[i];
-    }
-
-   private:
    constexpr static inline size_t size5 = 32;
    constexpr static inline size_t size6 = 64;
-    constexpr static inline size_t match_count = 256;

-    constexpr static auto Expand5 = ExpandArray<size5, scale5To8>();
-    constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
-
-    // match tables used for single-color blocks
-    using MatchList = std::array<MatchEntry, match_count>;
-    using MatchListPtr = std::shared_ptr<MatchList>;
-
-    const MatchListPtr _single_match5 = {std::make_shared<MatchList>()};
-    const MatchListPtr _single_match6 = {std::make_shared<MatchList>()};
-    const MatchListPtr _single_match5_half = {std::make_shared<MatchList>()};
-    const MatchListPtr _single_match6_half = {std::make_shared<MatchList>()};
-
-    virtual constexpr bool isIdeal() noexcept { return false; }
-    virtual constexpr bool useExpandedInMatch() noexcept { return true; }
-
-    void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len);
-
-    int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
-                                  bool ideal);
-};
+    virtual ~Interpolator() noexcept = default;

-class InterpolatorIdeal : public Interpolator {
-   public:
+    /**
+     * Performs a 2/3 interpolation of a pair of 5-bit values to produce an 8-bit value
+     * Output is approximately (2v0 + v1)/3, with v0 and v1 first extended to 8 bits.
+     * @param v0 The first 5-bit value
+     * @param v1 The second 5-bit value
+     * @return The interpolated value
+     */
    virtual int Interpolate5(int v0, int v1) const;
+
+    /**
+     * Performs a 2/3 interpolation of a pair of 5-bit values to produce an 8-bit value
+     * Output is approximately (2v0 + v1)/3, with v0 and v1 first extended to 8 bits.
+     * @param v0 The first 5-bit value
+     * @param v1 The second 5-bit value
+     * @return The interpolated value
+     */
    virtual int Interpolate6(int v0, int v1) const;
+
+    /**
+     * Performs a 1/2 interpolation of a pair of 5-bit values to produce an 8-bit value
+     * Output is approximately (v0 + v1)/2, with v0 and v1 first extended to 8 bits.
+     * @param v0 The first 5-bit value
+     * @param v1 The second 5-bit value
+     * @return The interpolated value
+     */
    virtual int InterpolateHalf5(int v0, int v1) const;
+
+    /**
+     * Performs a 1/2 interpolation of a pair of 6-bit values to produce an 8-bit value
+     * Output is approximately (v0 + v1)/2, with v0 and v1 first extended to 8 bits.
+     * @param v0 The first 6-bit value
+     * @param v1 The second 6-bit value
+     * @return The interpolated value
+     */
    virtual int InterpolateHalf6(int v0, int v1) const;

+    /**
+     * Generates the 4 colors for a BC1 block from the given 5:6:5-packed colors
+     * @param low first 5:6:5 color for the block
+     * @param high second 5:6:5 color for the block
+     * @return and array of 4 Color32 values, with indices matching BC1 selectors
+     */
+    std::array<Color32, 4> InterpolateBC1(uint16_t low, uint16_t high);
+
   private:
-    int Interpolate5or6(int v0, int v1) const;
-    int InterpolateHalf5or6(int v0, int v1) const;
-    virtual constexpr bool isIdeal() noexcept override { return true; }
+    virtual int Interpolate8(int v0, int v1) const;
+    virtual int InterpolateHalf8(int v0, int v1) const;
+
+    //    constexpr static auto Expand5 = ExpandArray<size5, scale5To8>();
+    //    constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
+    //
+    //    // match tables used for single-color blocks
+    //    using MatchList = std::array<MatchEntry, match_count>;
+    //    using MatchListPtr = std::shared_ptr<MatchList>;
+    //
+    //    const MatchListPtr _single_match5 = {std::make_shared<MatchList>()};
+    //    const MatchListPtr _single_match6 = {std::make_shared<MatchList>()};
+    //    const MatchListPtr _single_match5_half = {std::make_shared<MatchList>()};
+    //    const MatchListPtr _single_match6_half = {std::make_shared<MatchList>()};
+
+    Color32 InterpolateColor24(const Color32 &c0, const Color32 &c1) const {
+        return Color32(Interpolate8(c0.r, c1.r), Interpolate8(c0.g, c1.g), Interpolate8(c0.b, c1.b));
+    }
+
+    Color32 InterpolateHalfColor24(const Color32 &c0, const Color32 &c1) const {
+        return Color32(InterpolateHalf8(c0.r, c1.r), InterpolateHalf8(c0.g, c1.g), InterpolateHalf8(c0.b, c1.b));
+    }
+
+    virtual constexpr bool isIdeal() noexcept { return true; }
+    //    virtual constexpr bool useExpandedInMatch() noexcept { return true; }
+    //
+    //    void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len);
+    //
+    //    int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
+    //                                  bool ideal);
 };

-class InterpolatorIdealRound : public InterpolatorIdeal {
+class InterpolatorRound : public Interpolator {
   public:
-    virtual int Interpolate5(int v0, int v1) const override;
-    virtual int Interpolate6(int v0, int v1) const override;
+    int Interpolate5(int v0, int v1) const override;
+    int Interpolate6(int v0, int v1) const override;

   private:
-    int Interpolate5or6Round(int v0, int v1) const;
+    int Interpolate8(int v0, int v1) const;
 };

 class InterpolatorNvidia : public Interpolator {
   public:
-    virtual int Interpolate5(int v0, int v1) const;
-    virtual int Interpolate6(int v0, int v1) const;
-    virtual int InterpolateHalf5(int v0, int v1) const;
-    virtual int InterpolateHalf6(int v0, int v1) const;
-
-   private:
-    virtual constexpr bool useExpandedInMatch() noexcept override { return false; }
+    int Interpolate5(int v0, int v1) const override;
+    int Interpolate6(int v0, int v1) const override;
+    int InterpolateHalf5(int v0, int v1) const override;
+    int InterpolateHalf6(int v0, int v1) const override;
+    constexpr bool isIdeal() noexcept override { return false; }
 };

 class InterpolatorAMD : public Interpolator {
   public:
-    virtual int Interpolate5(int v0, int v1) const;
-    virtual int Interpolate6(int v0, int v1) const;
-    virtual int InterpolateHalf5(int v0, int v1) const;
-    virtual int InterpolateHalf6(int v0, int v1) const;
+    int Interpolate5(int v0, int v1) const override;
+    int Interpolate6(int v0, int v1) const override;
+    int InterpolateHalf5(int v0, int v1) const override;
+    int InterpolateHalf6(int v0, int v1) const override;
+    constexpr bool isIdeal() noexcept override { return false; }

   private:
-    int Interpolate5or6(int v0, int v1) const;
-    int InterpolateHalf5or6(int v0, int v1) const;
+    int Interpolate8(int v0, int v1) const;
+    int InterpolateHalf8(int v0, int v1) const;
 };
 }  // namespace rgbcx
--- a/src/rgbcx.cpp
+++ b/src/rgbcx.cpp
@ -476,7 +476,7 @@ static inline bool compute_least_squares_endpoints4_rgb(const Color32 *pColors,
    uint32_t uq00_r = 0, uq00_g = 0, uq00_b = 0;
    uint32_t weight_accum = 0;
    for (uint32_t i = 0; i < 16; i++) {
-        const uint8_t r = pColors[i].C[0], g = pColors[i].C[1], b = pColors[i].C[2];
+        const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
        const uint8_t sel = pSelectors[i];

        weight_accum += g_weight_vals4[sel];
@ -547,7 +547,7 @@ static inline bool compute_least_squares_endpoints3_rgb(bool use_black, const Co
    uint32_t weight_accum = 0;
    int total_r = 0, total_g = 0, total_b = 0;
    for (uint32_t i = 0; i < 16; i++) {
-        const uint8_t r = pColors[i].C[0], g = pColors[i].C[1], b = pColors[i].C[2];
+        const uint8_t r = pColors[i].c[0], g = pColors[i].c[1], b = pColors[i].c[2];
        if (use_black) {
            if ((r | g | b) < 4) continue;
        }
@ -684,10 +684,10 @@ static inline void bc1_find_sels4_noerr(const Color32 *pSrc_pixels, uint32_t lr,
    static const uint8_t s_sels[4] = {3, 2, 1, 0};

    for (uint32_t i = 0; i < 16; i += 4) {
-        const int d0 = pSrc_pixels[i + 0].R * ar + pSrc_pixels[i + 0].G * ag + pSrc_pixels[i + 0].B * ab;
-        const int d1 = pSrc_pixels[i + 1].R * ar + pSrc_pixels[i + 1].G * ag + pSrc_pixels[i + 1].B * ab;
-        const int d2 = pSrc_pixels[i + 2].R * ar + pSrc_pixels[i + 2].G * ag + pSrc_pixels[i + 2].B * ab;
-        const int d3 = pSrc_pixels[i + 3].R * ar + pSrc_pixels[i + 3].G * ag + pSrc_pixels[i + 3].B * ab;
+        const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab;
+        const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab;
+        const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab;
+        const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab;

        sels[i + 0] = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
        sels[i + 1] = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
@ -717,10 +717,10 @@ static inline uint32_t bc1_find_sels4_fasterr(const Color32 *pSrc_pixels, uint32
    uint32_t total_err = 0;

    for (uint32_t i = 0; i < 16; i += 4) {
-        const int d0 = pSrc_pixels[i + 0].R * ar + pSrc_pixels[i + 0].G * ag + pSrc_pixels[i + 0].B * ab;
-        const int d1 = pSrc_pixels[i + 1].R * ar + pSrc_pixels[i + 1].G * ag + pSrc_pixels[i + 1].B * ab;
-        const int d2 = pSrc_pixels[i + 2].R * ar + pSrc_pixels[i + 2].G * ag + pSrc_pixels[i + 2].B * ab;
-        const int d3 = pSrc_pixels[i + 3].R * ar + pSrc_pixels[i + 3].G * ag + pSrc_pixels[i + 3].B * ab;
+        const int d0 = pSrc_pixels[i + 0].r * ar + pSrc_pixels[i + 0].g * ag + pSrc_pixels[i + 0].b * ab;
+        const int d1 = pSrc_pixels[i + 1].r * ar + pSrc_pixels[i + 1].g * ag + pSrc_pixels[i + 1].b * ab;
+        const int d2 = pSrc_pixels[i + 2].r * ar + pSrc_pixels[i + 2].g * ag + pSrc_pixels[i + 2].b * ab;
+        const int d3 = pSrc_pixels[i + 3].r * ar + pSrc_pixels[i + 3].g * ag + pSrc_pixels[i + 3].b * ab;

        uint8_t sel0 = s_sels[(d0 <= t0) + (d0 < t1) + (d0 < t2)];
        uint8_t sel1 = s_sels[(d1 <= t0) + (d1 < t1) + (d1 < t2)];
@ -733,13 +733,13 @@ static inline uint32_t bc1_find_sels4_fasterr(const Color32 *pSrc_pixels, uint32
        sels[i + 3] = sel3;

        total_err +=
-            squarei(pSrc_pixels[i + 0].R - block_r[sel0]) + squarei(pSrc_pixels[i + 0].G - block_g[sel0]) + squarei(pSrc_pixels[i + 0].B - block_b[sel0]);
+            squarei(pSrc_pixels[i + 0].r - block_r[sel0]) + squarei(pSrc_pixels[i + 0].g - block_g[sel0]) + squarei(pSrc_pixels[i + 0].b - block_b[sel0]);
        total_err +=
-            squarei(pSrc_pixels[i + 1].R - block_r[sel1]) + squarei(pSrc_pixels[i + 1].G - block_g[sel1]) + squarei(pSrc_pixels[i + 1].B - block_b[sel1]);
+            squarei(pSrc_pixels[i + 1].r - block_r[sel1]) + squarei(pSrc_pixels[i + 1].g - block_g[sel1]) + squarei(pSrc_pixels[i + 1].b - block_b[sel1]);
        total_err +=
-            squarei(pSrc_pixels[i + 2].R - block_r[sel2]) + squarei(pSrc_pixels[i + 2].G - block_g[sel2]) + squarei(pSrc_pixels[i + 2].B - block_b[sel2]);
+            squarei(pSrc_pixels[i + 2].r - block_r[sel2]) + squarei(pSrc_pixels[i + 2].g - block_g[sel2]) + squarei(pSrc_pixels[i + 2].b - block_b[sel2]);
        total_err +=
-            squarei(pSrc_pixels[i + 3].R - block_r[sel3]) + squarei(pSrc_pixels[i + 3].G - block_g[sel3]) + squarei(pSrc_pixels[i + 3].B - block_b[sel3]);
+            squarei(pSrc_pixels[i + 3].r - block_r[sel3]) + squarei(pSrc_pixels[i + 3].g - block_g[sel3]) + squarei(pSrc_pixels[i + 3].b - block_b[sel3]);

        if (total_err >= cur_err) break;
    }
@ -759,9 +759,9 @@ static inline uint32_t bc1_find_sels4_check2_err(const Color32 *pSrc_pixels, uin
    uint32_t total_err = 0;

    for (uint32_t i = 0; i < 16; i++) {
-        const int r = pSrc_pixels[i].R;
-        const int g = pSrc_pixels[i].G;
-        const int b = pSrc_pixels[i].B;
+        const int r = pSrc_pixels[i].r;
+        const int g = pSrc_pixels[i].g;
+        const int b = pSrc_pixels[i].b;

        int sel = (int)((float)((r - (int)block_r[0]) * dr + (g - (int)block_g[0]) * dg + (b - (int)block_b[0]) * db) * f + .5f);
        sel = clampi(sel, 1, 3);
@ -796,9 +796,9 @@ static inline uint32_t bc1_find_sels4_fullerr(const Color32 *pSrc_pixels, uint32
    uint32_t total_err = 0;

    for (uint32_t i = 0; i < 16; i++) {
-        const int r = pSrc_pixels[i].R;
-        const int g = pSrc_pixels[i].G;
-        const int b = pSrc_pixels[i].B;
+        const int r = pSrc_pixels[i].r;
+        const int g = pSrc_pixels[i].g;
+        const int b = pSrc_pixels[i].b;

        uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b);
        uint8_t best_sel = 0;
@ -842,9 +842,9 @@ static inline uint32_t bc1_find_sels3_fullerr(bool use_black, const Color32 *pSr
    uint32_t total_err = 0;

    for (uint32_t i = 0; i < 16; i++) {
-        const int r = pSrc_pixels[i].R;
-        const int g = pSrc_pixels[i].G;
-        const int b = pSrc_pixels[i].B;
+        const int r = pSrc_pixels[i].r;
+        const int g = pSrc_pixels[i].g;
+        const int b = pSrc_pixels[i].b;

        uint32_t best_err = squarei((int)block_r[0] - (int)r) + squarei((int)block_g[0] - (int)g) + squarei((int)block_b[0] - (int)b);
        uint32_t best_sel = 0;
@ -923,8 +923,8 @@ static inline void precise_round_565_noscale(vec3F xl, vec3F xh, int &trial_lr,
 }

 static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) {
-    uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb);
-    uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb);
+    uint16_t lc16 = Color32::Pack565Unscaled(lr, lg, lb);
+    uint16_t hc16 = Color32::Pack565Unscaled(hr, hg, hb);

    // Always forbid 3 color blocks
    if (lc16 == hc16) {
@ -974,8 +974,8 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int
 }

 static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) {
-    uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb);
-    uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb);
+    uint16_t lc16 = Color32::Pack565Unscaled(lr, lg, lb);
+    uint16_t hc16 = Color32::Pack565Unscaled(hr, hg, hb);

    bool invert_flag = false;
    if (lc16 > hc16) {
@ -1018,7 +1018,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags
    int min_r = 255, min_g = 255, min_b = 255;
    int total_pixels = 0;
    for (uint32_t i = 0; i < 16; i++) {
-        const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
+        const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
        if ((r | g | b) < 4) continue;

        max_r = std::max(max_r, r);
@ -1045,9 +1045,9 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags

    int icov[6] = {0, 0, 0, 0, 0, 0};
    for (uint32_t i = 0; i < 16; i++) {
-        int r = (int)pSrc_pixels[i].R;
-        int g = (int)pSrc_pixels[i].G;
-        int b = (int)pSrc_pixels[i].B;
+        int r = (int)pSrc_pixels[i].r;
+        int g = (int)pSrc_pixels[i].g;
+        int b = (int)pSrc_pixels[i].b;

        if ((r | g | b) < 4) continue;

@ -1094,7 +1094,7 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags

    int low_dot = INT_MAX, high_dot = INT_MIN;
    for (uint32_t i = 0; i < 16; i++) {
-        int r = (int)pSrc_pixels[i].R, g = (int)pSrc_pixels[i].G, b = (int)pSrc_pixels[i].B;
+        int r = (int)pSrc_pixels[i].r, g = (int)pSrc_pixels[i].g, b = (int)pSrc_pixels[i].b;

        if ((r | g | b) < 4) continue;

@ -1109,13 +1109,13 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags
        }
    }

-    int lr = scale8To5(pSrc_pixels[low_c].R);
-    int lg = scale8To6(pSrc_pixels[low_c].G);
-    int lb = scale8To5(pSrc_pixels[low_c].B);
+    int lr = scale8To5(pSrc_pixels[low_c].r);
+    int lg = scale8To6(pSrc_pixels[low_c].g);
+    int lb = scale8To5(pSrc_pixels[low_c].b);

-    int hr = scale8To5(pSrc_pixels[high_c].R);
-    int hg = scale8To6(pSrc_pixels[high_c].G);
-    int hb = scale8To5(pSrc_pixels[high_c].B);
+    int hr = scale8To5(pSrc_pixels[high_c].r);
+    int hg = scale8To6(pSrc_pixels[high_c].g);
+    int hb = scale8To5(pSrc_pixels[high_c].b);

    uint8_t trial_sels[16];
    uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX);
@ -1236,9 +1236,9 @@ static bool try_3color_block(const Color32 *pSrc_pixels, uint32_t flags, uint32_

        int dots[16];
        for (uint32_t i = 0; i < 16; i++) {
-            int r = pSrc_pixels[i].R;
-            int g = pSrc_pixels[i].G;
-            int b = pSrc_pixels[i].B;
+            int r = pSrc_pixels[i].r;
+            int g = pSrc_pixels[i].g;
+            int b = pSrc_pixels[i].b;
            int d = 0x1000000 + (r * ar + g * ag + b * ab);
            assert(d >= 0);
            dots[i] = (d << 4) + i;
@ -1255,9 +1255,9 @@ static bool try_3color_block(const Color32 *pSrc_pixels, uint32_t flags, uint32_
            g_sum[i] = g;
            b_sum[i] = b;

-            r += pSrc_pixels[p].R;
-            g += pSrc_pixels[p].G;
-            b += pSrc_pixels[p].B;
+            r += pSrc_pixels[p].r;
+            g += pSrc_pixels[p].g;
+            b += pSrc_pixels[p].b;
        }

        r_sum[16] = total_r;
@ -1338,7 +1338,7 @@ void encode_bc1(uint32_t level, void *pDst, const uint8_t *pPixels, bool allow_3
            flags = cEncodeBC1BoundingBoxInt;
            break;
        case 1:
-            // Faster/higher quality than stb_dxt default. A bit higher average quality vs. mode 0.
+            // Faster/higher quality than stb_dxt default. a bit higher average quality vs. mode 0.
            flags = cEncodeBC1Use2DLS;
            break;
        case 2:
@ -1452,11 +1452,12 @@ void encode_bc1(uint32_t level, void *pDst, const uint8_t *pPixels, bool allow_3
    encode_bc1(pDst, pPixels, flags, total_orderings4, total_orderings3);
 }

+// Finds low and high colors to begin with
 static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t flags, bool grayscale_flag, int min_r, int min_g, int min_b, int max_r,
                                           int max_g, int max_b, int avg_r, int avg_g, int avg_b, int total_r, int total_g, int total_b, int &lr, int &lg,
                                           int &lb, int &hr, int &hg, int &hb) {
    if (grayscale_flag) {
-        const int fr = pSrc_pixels[0].R;
+        const int fr = pSrc_pixels[0].r;

        // Grayscale blocks are a common enough case to specialize.
        if ((max_r - min_r) < 2) {
@ -1480,7 +1481,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
        vec3F l, h;
        if (big_chan == 0) {
            for (uint32_t i = 0; i < 16; i++) {
-                const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
+                const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
                sum_xy_r += r * r, sum_xy_g += r * g, sum_xy_b += r * b;
            }

@ -1521,7 +1522,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
            h.c[0] = fmax_chan_val;
        } else if (big_chan == 1) {
            for (uint32_t i = 0; i < 16; i++) {
-                const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
+                const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
                sum_xy_r += g * r, sum_xy_g += g * g, sum_xy_b += g * b;
            }

@ -1562,7 +1563,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
            h.c[1] = fmax_chan_val;
        } else {
            for (uint32_t i = 0; i < 16; i++) {
-                const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
+                const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;
                sum_xy_r += b * r, sum_xy_g += b * g, sum_xy_b += b * b;
            }

@ -1630,9 +1631,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t

        int icov_xz = 0, icov_yz = 0;
        for (uint32_t i = 0; i < 16; i++) {
-            int r = (int)pSrc_pixels[i].R - avg_r;
-            int g = (int)pSrc_pixels[i].G - avg_g;
-            int b = (int)pSrc_pixels[i].B - avg_b;
+            int r = (int)pSrc_pixels[i].r - avg_r;
+            int g = (int)pSrc_pixels[i].g - avg_g;
+            int b = (int)pSrc_pixels[i].b - avg_b;
            icov_xz += r * b;
            icov_yz += g * b;
        }
@ -1668,9 +1669,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t

        int icov_xz = 0, icov_yz = 0;
        for (uint32_t i = 0; i < 16; i++) {
-            int r = (int)pSrc_pixels[i].R - avg_r;
-            int g = (int)pSrc_pixels[i].G - avg_g;
-            int b = (int)pSrc_pixels[i].B - avg_b;
+            int r = (int)pSrc_pixels[i].r - avg_r;
+            int g = (int)pSrc_pixels[i].g - avg_g;
+            int b = (int)pSrc_pixels[i].b - avg_b;
            icov_xz += r * b;
            icov_yz += g * b;
        }
@ -1680,6 +1681,7 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
        int x1 = max_r;
        int y1 = max_g;

+        // swap r and g min and max to align principal axis
        if (icov_xz < 0) std::swap(x0, x1);

        if (icov_yz < 0) std::swap(y0, y1);
@ -1697,9 +1699,9 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t

        int icov[6] = {0, 0, 0, 0, 0, 0};
        for (uint32_t i = 0; i < 16; i++) {
-            int r = (int)pSrc_pixels[i].R - avg_r;
-            int g = (int)pSrc_pixels[i].G - avg_g;
-            int b = (int)pSrc_pixels[i].B - avg_b;
+            int r = (int)pSrc_pixels[i].r - avg_r;
+            int g = (int)pSrc_pixels[i].g - avg_g;
+            int b = (int)pSrc_pixels[i].b - avg_b;
            icov[0] += r * r;
            icov[1] += r * g;
            icov[2] += r * b;
@ -1746,10 +1748,10 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
        saxis_b = (int)((uint32_t)saxis_b << 4U);

        for (uint32_t i = 0; i < 16; i += 4) {
-            int dot0 = ((pSrc_pixels[i].R * saxis_r + pSrc_pixels[i].G * saxis_g + pSrc_pixels[i].B * saxis_b) & ~0xF) + i;
-            int dot1 = ((pSrc_pixels[i + 1].R * saxis_r + pSrc_pixels[i + 1].G * saxis_g + pSrc_pixels[i + 1].B * saxis_b) & ~0xF) + i + 1;
-            int dot2 = ((pSrc_pixels[i + 2].R * saxis_r + pSrc_pixels[i + 2].G * saxis_g + pSrc_pixels[i + 2].B * saxis_b) & ~0xF) + i + 2;
-            int dot3 = ((pSrc_pixels[i + 3].R * saxis_r + pSrc_pixels[i + 3].G * saxis_g + pSrc_pixels[i + 3].B * saxis_b) & ~0xF) + i + 3;
+            int dot0 = ((pSrc_pixels[i].r * saxis_r + pSrc_pixels[i].g * saxis_g + pSrc_pixels[i].b * saxis_b) & ~0xF) + i;
+            int dot1 = ((pSrc_pixels[i + 1].r * saxis_r + pSrc_pixels[i + 1].g * saxis_g + pSrc_pixels[i + 1].b * saxis_b) & ~0xF) + i + 1;
+            int dot2 = ((pSrc_pixels[i + 2].r * saxis_r + pSrc_pixels[i + 2].g * saxis_g + pSrc_pixels[i + 2].b * saxis_b) & ~0xF) + i + 2;
+            int dot3 = ((pSrc_pixels[i + 3].r * saxis_r + pSrc_pixels[i + 3].g * saxis_g + pSrc_pixels[i + 3].b * saxis_b) & ~0xF) + i + 3;

            int min_d01 = std::min(dot0, dot1);
            int max_d01 = std::max(dot0, dot1);
@ -1766,13 +1768,13 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
        low_c = low_dot & 15;
        high_c = high_dot & 15;

-        lr = scale8To5(pSrc_pixels[low_c].R);
-        lg = scale8To6(pSrc_pixels[low_c].G);
-        lb = scale8To5(pSrc_pixels[low_c].B);
+        lr = scale8To5(pSrc_pixels[low_c].r);
+        lg = scale8To6(pSrc_pixels[low_c].g);
+        lb = scale8To5(pSrc_pixels[low_c].b);

-        hr = scale8To5(pSrc_pixels[high_c].R);
-        hg = scale8To6(pSrc_pixels[high_c].G);
-        hb = scale8To5(pSrc_pixels[high_c].B);
+        hr = scale8To5(pSrc_pixels[high_c].r);
+        hg = scale8To6(pSrc_pixels[high_c].g);
+        hb = scale8To5(pSrc_pixels[high_c].b);
    }
 }

@ -1857,11 +1859,11 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot

    int avg_r, avg_g, avg_b, min_r, min_g, min_b, max_r, max_g, max_b;

-    const uint32_t fr = pSrc_pixels[0].R, fg = pSrc_pixels[0].G, fb = pSrc_pixels[0].B;
+    const uint32_t fr = pSrc_pixels[0].r, fg = pSrc_pixels[0].g, fb = pSrc_pixels[0].b;

    uint32_t j;
    for (j = 15; j >= 1; --j)
-        if ((pSrc_pixels[j].R != fr) || (pSrc_pixels[j].G != fg) || (pSrc_pixels[j].B != fb)) break;
+        if ((pSrc_pixels[j].r != fr) || (pSrc_pixels[j].g != fg) || (pSrc_pixels[j].b != fb)) break;

    if (j == 0) {
        encode_bc1_solid_block(pDst, fr, fg, fb, (flags & (cEncodeBC1Use3ColorBlocks | cEncodeBC1Use3ColorBlocksForBlackPixels)) != 0);
@ -1877,7 +1879,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot
    uint32_t any_black_pixels = (fr | fg | fb) < 4;

    for (uint32_t i = 1; i < 16; i++) {
-        const int r = pSrc_pixels[i].R, g = pSrc_pixels[i].G, b = pSrc_pixels[i].B;
+        const int r = pSrc_pixels[i].r, g = pSrc_pixels[i].g, b = pSrc_pixels[i].b;

        grayscale_flag &= ((r == g) && (r == b));
        any_black_pixels |= ((r | g | b) < 4);
@ -1953,6 +1955,7 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot

        }  // ls_pass
    } else {
+        // calculate block error from naïve approach
        const uint32_t total_rounds = (flags & cEncodeBC1TryAllInitialEndponts) ? 2 : 1;
        for (uint32_t round = 0; round < total_rounds; round++) {
            uint32_t modified_flags = flags;
@ -2065,9 +2068,9 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot

            int dots[16];
            for (uint32_t i = 0; i < 16; i++) {
-                int r = pSrc_pixels[i].R;
-                int g = pSrc_pixels[i].G;
-                int b = pSrc_pixels[i].B;
+                int r = pSrc_pixels[i].r;
+                int g = pSrc_pixels[i].g;
+                int b = pSrc_pixels[i].b;
                int d = 0x1000000 + (r * ar + g * ag + b * ab);
                assert(d >= 0);
                dots[i] = (d << 4) + i;
@ -2084,9 +2087,9 @@ void encode_bc1(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t tot
                g_sum[i] = g;
                b_sum[i] = b;

-                r += pSrc_pixels[p].R;
-                g += pSrc_pixels[p].G;
-                b += pSrc_pixels[p].B;
+                r += pSrc_pixels[p].r;
+                g += pSrc_pixels[p].g;
+                b += pSrc_pixels[p].b;
            }

            r_sum[16] = total_r;
@ -2368,43 +2371,43 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr
    bool used_punchthrough = false;

    if (l > h) {
-        c[0].set(r0, g0, b0, 255);
-        c[1].set(r1, g1, b1, 255);
+        c[0].Set(r0, g0, b0, 255);
+        c[1].Set(r1, g1, b1, 255);
        switch (mode) {
            case bc1_approx_mode::cBC1Ideal:
-                c[2].set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
-                c[3].set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
+                c[2].Set((r0 * 2 + r1) / 3, (g0 * 2 + g1) / 3, (b0 * 2 + b1) / 3, 255);
+                c[3].Set((r1 * 2 + r0) / 3, (g1 * 2 + g0) / 3, (b1 * 2 + b0) / 3, 255);
                break;
            case bc1_approx_mode::cBC1IdealRound4:
-                c[2].set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255);
-                c[3].set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255);
+                c[2].Set((r0 * 2 + r1 + 1) / 3, (g0 * 2 + g1 + 1) / 3, (b0 * 2 + b1 + 1) / 3, 255);
+                c[3].Set((r1 * 2 + r0 + 1) / 3, (g1 * 2 + g0 + 1) / 3, (b1 * 2 + b0 + 1) / 3, 255);
                break;
            case bc1_approx_mode::cBC1NVidia:
-                c[2].set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255);
-                c[3].set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255);
+                c[2].Set(interp_5_nv(cr0, cr1), interp_6_nv(g0, g1), interp_5_nv(cb0, cb1), 255);
+                c[3].Set(interp_5_nv(cr1, cr0), interp_6_nv(g1, g0), interp_5_nv(cb1, cb0), 255);
                break;
            case bc1_approx_mode::cBC1AMD:
-                c[2].set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
-                c[3].set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
+                c[2].Set(interp_5_6_amd(r0, r1), interp_5_6_amd(g0, g1), interp_5_6_amd(b0, b1), 255);
+                c[3].Set(interp_5_6_amd(r1, r0), interp_5_6_amd(g1, g0), interp_5_6_amd(b1, b0), 255);
                break;
        }
    } else {
-        c[0].set(r0, g0, b0, 255);
-        c[1].set(r1, g1, b1, 255);
+        c[0].Set(r0, g0, b0, 255);
+        c[1].Set(r1, g1, b1, 255);
        switch (mode) {
            case bc1_approx_mode::cBC1Ideal:
            case bc1_approx_mode::cBC1IdealRound4:
-                c[2].set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
+                c[2].Set((r0 + r1) / 2, (g0 + g1) / 2, (b0 + b1) / 2, 255);
                break;
            case bc1_approx_mode::cBC1NVidia:
-                c[2].set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255);
+                c[2].Set(interp_half_5_nv(cr0, cr1), interp_half_6_nv(g0, g1), interp_half_5_nv(cb0, cb1), 255);
                break;
            case bc1_approx_mode::cBC1AMD:
-                c[2].set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
+                c[2].Set(interp_half_5_6_amd(r0, r1), interp_half_5_6_amd(g0, g1), interp_half_5_6_amd(b0, b1), 255);
                break;
        }

-        c[3].set(0, 0, 0, 0);
+        c[3].Set(0, 0, 0, 0);
        used_punchthrough = true;
    }

@ -2417,10 +2420,10 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr
        }
    } else {
        for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) {
-            pDst_pixels[0].set(c[pBlock->GetSelector(0, y)]);
-            pDst_pixels[1].set(c[pBlock->GetSelector(1, y)]);
-            pDst_pixels[2].set(c[pBlock->GetSelector(2, y)]);
-            pDst_pixels[3].set(c[pBlock->GetSelector(3, y)]);
+            pDst_pixels[0].Set(c[pBlock->GetSelector(0, y)]);
+            pDst_pixels[1].Set(c[pBlock->GetSelector(1, y)]);
+            pDst_pixels[2].Set(c[pBlock->GetSelector(2, y)]);
+            pDst_pixels[3].Set(c[pBlock->GetSelector(3, y)]);
        }
    }

@ -2452,7 +2455,7 @@ bool unpack_bc3(const void *pBlock_bits, void *pPixels, bc1_approx_mode mode) {

    if (unpack_bc1((const uint8_t *)pBlock_bits + sizeof(BC4Block), pDst_pixels, true, mode)) success = false;

-    unpack_bc4(pBlock_bits, &pDst_pixels[0].A, sizeof(Color32));
+    unpack_bc4(pBlock_bits, &pDst_pixels[0].a, sizeof(Color32));

    return success;
 }
--- a/src/rgbcx.h
+++ b/src/rgbcx.h
@ -56,6 +56,7 @@
 #include <cstdint>

 #include "blocks.h"
+#include "interpolator.h"

 // By default, the table used to accelerate cluster fit on 4 color blocks uses a 969x128 entry table.
 // To reduce the executable size, set RGBCX_USE_SMALLER_TABLES to 1, which selects the smaller 969x32 entry table.
--- a/src/util.h
+++ b/src/util.h
@ -32,8 +32,8 @@ static inline uint8_t scale8To6(uint32_t v) {
    return (uint8_t)((v + (v >> 8)) >> 8);
 }

-static inline int scale5To8(int v) { return (v << 3) | (v >> 2); }
-static inline int scale6To8(int v) { return (v << 2) | (v >> 4); }
+static constexpr int scale5To8(int v) { return (v << 3) | (v >> 2); }
+static constexpr int scale6To8(int v) { return (v << 2) | (v >> 4); }

 template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
 template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }