diff --git a/.clang-format b/.clang-format
index ee1cf84..ed7b74d 100644
--- a/.clang-format
+++ b/.clang-format
@@ -1,6 +1,10 @@
 ---
-BasedOnStyle: LLVM
+BasedOnStyle: google
 IndentWidth: 4
 ColumnLimit: 160
+AllowShortBlocksOnASingleLine: Always
+AllowShortFunctionsOnASingleLine: All
+AlwaysBreakTemplateDeclarations: MultiLine
+#AlignConsecutiveDeclarations: true
 ---
 
diff --git a/.clang-tidy b/.clang-tidy
index 77c0cca..113d779 100644
--- a/.clang-tidy
+++ b/.clang-tidy
@@ -1,21 +1,18 @@
----
-FormatStyle: LLVM
-Checks: 'clang-diagnostic-*,clang-analyzer-*,readability-*'
+FormatStyle: google
 
+Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming,cppcoreguidelines-narrowing-conversions'
 CheckOptions:
-  - key: readability-identifier-naming.AbstractClassCase
-    value: 'CamelCase'
-  - key: readability-identifier-naming.ClassCase
-    value: 'CamelCase'
-  - key: readability-identifier-naming.EnumCase
-    value: 'CamelCase'
-  - key: readability-identifier-naming.StructCase
-    value: 'CamelCase'
-  - key: readability-identifier-naming.TypedefCase
-    value: 'CamelCase'
-  - key: readability-identifier-naming.MemberCase
-    value: 'CamelCase'
-  - key: readability-identifier-naming.FunctionCase
-    value: 'camelBack'
-  - key: readability-identifier-naming.ClassMethodCase
-    value: 'camelBack'
\ No newline at end of file
+  - { key: readability-identifier-naming.NamespaceCase,          value: lower_case }
+  - { key: readability-identifier-naming.ClassCase,              value: CamelCase }
+  - { key: readability-identifier-naming.StructCase,             value: CamelCase }
+  - { key: readability-identifier-naming.TemplateParameterCase,  value: CamelCase }
+  - { key: readability-identifier-naming.FunctionCase,           value: aNy_CasE }
+  - { key: readability-identifier-naming.VariableCase,           value: lower_case }
+  - { key: readability-identifier-naming.MemberCase,        value: lower_case }
+  - { key: readability-identifier-naming.PrivateMemberPrefix,    value: _ }
+  - { key: readability-identifier-naming.ProtectedMemberPrefix,  value: _ }
+  - { key: readability-identifier-naming.EnumConstantCase,         value: CamelCase }
+  - { key: readability-identifier-naming.ConstexprVariableCase,    value: CamelCase }
+  - { key: readability-identifier-naming.GlobalConstantCase,       value: CamelCase }
+  - { key: readability-identifier-naming.MemberConstantCase,       value: CamelCase }
+  - { key: readability-identifier-naming.StaticConstantCase,       value: CamelCase }
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index f51f421..fb19e63 100644
--- a/.gitignore
+++ b/.gitignore
@@ -17,4 +17,4 @@ install_manifest.txt
 compile_commands.json
 CTestTestfile.cmake
 _deps
-cmake-build-debug
+cmake-build-*
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4cafb2a..cbaab56 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -18,8 +18,7 @@ source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCE_FILES} ${HEADER_FIL
 pybind11_add_module(python_rgbcx
         ${SOURCE_FILES}
         ${HEADER_FILES}
-        ${PYTHON_FILES}
-        )
+        ${PYTHON_FILES})
 
 add_executable(test_rgbcx
         ${SOURCE_FILES}
@@ -27,5 +26,8 @@ add_executable(test_rgbcx
         ${TEST_FILES})
 
 # Set module features, like C/C++ standards
-target_compile_features(python_rgbcx PUBLIC cxx_std_17 c_std_11)
-target_compile_features(test_rgbcx PUBLIC cxx_std_17 c_std_11)
\ No newline at end of file
+target_compile_features(python_rgbcx PUBLIC cxx_std_20 c_std_11)
+target_compile_features(test_rgbcx PUBLIC cxx_std_20 c_std_11)
+
+set_property(TARGET python_rgbcx test_rgbcx PROPERTY INTERPROCEDURAL_OPTIMIZATION True) #enable FLTO if available
+set_property(TARGET python_rgbcx test_rgbcx PROPERTY OSX_ARCHITECTURES_RELEASE x86_64 arm64) #Mach-O fat binary for arm and x86
diff --git a/src/blocks.cpp b/src/blocks.cpp
index 791c8a9..78bfad7 100644
--- a/src/blocks.cpp
+++ b/src/blocks.cpp
@@ -18,38 +18,8 @@
  */
 
 #include "blocks.h"
+#include "color.h"
 #include <algorithm>
 #include <cassert>
 
-// region Color32 implementation
-Color32::Color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va) { set(vr, vg, vb, va); }
-
-uint8_t Color32::operator[](uint32_t idx) const {
-    assert(idx < 4);
-    return C[idx];
-}
-
-uint8_t &Color32::operator[](uint32_t idx) {
-    assert(idx < 4);
-    return C[idx];
-}
-
-void Color32::set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va) {
-    C[0] = vr;
-    C[1] = vg;
-    C[2] = vb;
-    C[3] = va;
-}
-
-void Color32::set(const Color32 &other) {
-    C[0] = other.C[0];
-    C[1] = other.C[1];
-    C[2] = other.C[2];
-}
-Color32 Color32::min(const Color32 &a, const Color32 &b) {
-    return Color32(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3]));
-}
-Color32 Color32::max(const Color32 &a, const Color32 &b) {
-    return Color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3]));
-}
 // endregion
diff --git a/src/blocks.h b/src/blocks.h
index 0492e7d..9223d3b 100644
--- a/src/blocks.h
+++ b/src/blocks.h
@@ -19,172 +19,120 @@
 
 #pragma once
 
-#include "util.h"
+#include <array>
 #include <cassert>
 #include <cstdint>
 #include <cstdlib>
 
-constexpr inline uint8_t DXT1SelectorBits = 2U;
+#include "color.h"
+#include "util.h"
 
 #pragma pack(push, 1)
-struct Color32 {
-    union {
-        struct {
-            uint8_t R;
-            uint8_t G;
-            uint8_t B;
-            uint8_t A;
-        };
-
-        uint8_t C[4];
-    };
-
-    Color32() {}
-
-    Color32(uint32_t vr, uint32_t vg, uint32_t vb, uint32_t va);
-
-    void set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va);
-
-    void set(const Color32 &other);
-
-    uint8_t operator[](uint32_t idx) const;
-    uint8_t &operator[](uint32_t idx);
-
-    bool operator==(const Color32 &rhs) const {
-        return R == rhs.R && G == rhs.G && B == rhs.B && A == rhs.A;
+class BC1Block {
+   public:
+    uint16_t GetLowColor() const { return _low_color[0] | _low_color[1] << 8U; }
+    uint16_t GetHighColor() const { return _high_color[0] | _high_color[1] << 8U; }
+    Color32 GetLowColor32() const { return Color32::unpack565(GetLowColor()); }
+    Color32 GetHighColor32() const { return Color32::unpack565(GetHighColor()); }
+
+    bool Is3Color() const { return GetLowColor() <= GetHighColor(); }
+    void SetLowColor(uint16_t c) {
+        _low_color[0] = c & 0xFF;
+        _low_color[1] = (c >> 8) & 0xFF;
     }
-
-    static Color32 min(const Color32 &a, const Color32 &b);
-    static Color32 max(const Color32 &a, const Color32 &b);
-};
-
-struct BC1Block {
-    constexpr static inline size_t EndpointSize = 2;
-    constexpr static inline size_t SelectorSize = 4;
-    constexpr static inline uint8_t SelectorBits = 2;
-    constexpr static inline uint8_t SelectorValues = 1 << SelectorBits;
-    constexpr static inline uint8_t SelectorMask = SelectorValues - 1;
-
-    uint8_t LowColor[EndpointSize];
-    uint8_t HighColor[EndpointSize];
-    uint8_t Selectors[SelectorSize];
-
-    inline uint32_t get_low_color() const { return LowColor[0] | (LowColor[1] << 8U); }
-    inline uint32_t get_high_color() const { return HighColor[0] | (HighColor[1] << 8U); }
-    inline bool is_3color() const { return get_low_color() <= get_high_color(); }
-    inline void set_low_color(uint16_t c) {
-        LowColor[0] = static_cast<uint8_t>(c & 0xFF);
-        LowColor[1] = static_cast<uint8_t>((c >> 8) & 0xFF);
-    }
-    inline void set_high_color(uint16_t c) {
-        HighColor[0] = static_cast<uint8_t>(c & 0xFF);
-        HighColor[1] = static_cast<uint8_t>((c >> 8) & 0xFF);
+    void SetHighColor(uint16_t c) {
+        _high_color[0] = c & 0xFF;
+        _high_color[1] = (c >> 8) & 0xFF;
     }
-    inline uint32_t get_selector(uint32_t x, uint32_t y) const {
+    uint32_t GetSelector(uint32_t x, uint32_t y) const {
         assert((x < 4U) && (y < 4U));
-        return (Selectors[y] >> (x * SelectorBits)) & SelectorMask;
+        return (selectors[y] >> (x * SelectorBits)) & SelectorMask;
     }
-    inline void set_selector(uint32_t x, uint32_t y, uint32_t val) {
+    void SetSelector(uint32_t x, uint32_t y, uint32_t val) {
         assert((x < 4U) && (y < 4U) && (val < 4U));
-        Selectors[y] &= (~(SelectorMask << (x * SelectorBits)));
-        Selectors[y] |= (val << (x * DXT1SelectorBits));
+        selectors[y] &= (~(SelectorMask << (x * SelectorBits)));
+        selectors[y] |= (val << (x * SelectorBits));
     }
 
-    static inline uint16_t pack_color(const Color32 &color, bool scaled, uint32_t bias = 127U) {
-        uint32_t r = color.R, g = color.G, b = color.B;
-        if (scaled) {
-            r = (r * 31U + bias) / 255U;
-            g = (g * 63U + bias) / 255U;
-            b = (b * 31U + bias) / 255U;
-        }
-        return static_cast<uint16_t>(minimum(b, 31U) | (minimum(g, 63U) << 5U) | (minimum(r, 31U) << 11U));
-    }
-
-    static inline uint16_t pack_unscaled_color(uint32_t r, uint32_t g, uint32_t b) { return static_cast<uint16_t>(b | (g << 5U) | (r << 11U)); }
-
-    static inline void unpack_color(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) {
-        r = (c >> 11) & 31;
-        g = (c >> 5) & 63;
-        b = c & 31;
-
-        r = (r << 3) | (r >> 2);
-        g = (g << 2) | (g >> 4);
-        b = (b << 3) | (b >> 2);
-    }
-
-    static inline void unpack_color_unscaled(uint32_t c, uint32_t &r, uint32_t &g, uint32_t &b) {
-        r = (c >> 11) & 31;
-        g = (c >> 5) & 63;
-        b = c & 31;
-    }
-};
-
-struct BC4Block {
-    constexpr static inline size_t EndpointSize = 1;
-    constexpr static inline size_t SelectorSize = 6;
-    constexpr static inline uint8_t SelectorBits = 3;
+    constexpr static inline size_t EndpointSize = 2;
+    constexpr static inline size_t SelectorSize = 4;
+    constexpr static inline uint8_t SelectorBits = 2;
     constexpr static inline uint8_t SelectorValues = 1 << SelectorBits;
     constexpr static inline uint8_t SelectorMask = SelectorValues - 1;
 
-    uint8_t LowAlpha;
-    uint8_t HighAlpha;
-    uint8_t Selectors[SelectorSize];
+   private:
+    std::array<uint8_t, EndpointSize> _low_color;
+    std::array<uint8_t, EndpointSize> _high_color;
+
+   public:
+    std::array<uint8_t, 4> selectors;
+};
 
-    inline uint32_t get_low_alpha() const { return LowAlpha; }
-    inline uint32_t get_high_alpha() const { return HighAlpha; }
-    inline bool is_alpha6_block() const { return get_low_alpha() <= get_high_alpha(); }
+class BC4Block {
+   public:
+    inline uint32_t GetLowAlpha() const { return low_alpha; }
+    inline uint32_t GetHighAlpha() const { return high_alpha; }
+    inline bool Is6Alpha() const { return GetLowAlpha() <= GetHighAlpha(); }
 
-    inline uint64_t get_selector_bits() const {
-        return ((uint64_t)((uint32_t)Selectors[0] | ((uint32_t)Selectors[1] << 8U) | ((uint32_t)Selectors[2] << 16U) |
-                           ((uint32_t)Selectors[3] << 24U))) |
-               (((uint64_t)Selectors[4]) << 32U) | (((uint64_t)Selectors[5]) << 40U);
+    inline uint64_t GetSelectorBits() const {
+        return ((uint64_t)((uint32_t)selectors[0] | ((uint32_t)selectors[1] << 8U) | ((uint32_t)selectors[2] << 16U) | ((uint32_t)selectors[3] << 24U))) |
+               (((uint64_t)selectors[4]) << 32U) | (((uint64_t)selectors[5]) << 40U);
     }
 
-    inline uint32_t get_selector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
+    inline uint32_t GetSelector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
         assert((x < 4U) && (y < 4U));
         return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask);
     }
 
-    static inline uint32_t get_block_values6(uint8_t *pDst, uint32_t l, uint32_t h) {
-        pDst[0] = static_cast<uint8_t>(l);
-        pDst[1] = static_cast<uint8_t>(h);
-        pDst[2] = static_cast<uint8_t>((l * 4 + h) / 5);
-        pDst[3] = static_cast<uint8_t>((l * 3 + h * 2) / 5);
-        pDst[4] = static_cast<uint8_t>((l * 2 + h * 3) / 5);
-        pDst[5] = static_cast<uint8_t>((l + h * 4) / 5);
-        pDst[6] = 0;
-        pDst[7] = 255;
-        return 6;
+    static inline std::array<uint8_t, 8> GetBlockValues6(uint32_t l, uint32_t h) {
+        return {static_cast<uint8_t>(l),
+                static_cast<uint8_t>(h),
+                static_cast<uint8_t>((l * 4 + h) / 5),
+                static_cast<uint8_t>((l * 3 + h * 2) / 5),
+                static_cast<uint8_t>((l * 2 + h * 3) / 5),
+                static_cast<uint8_t>((l + h * 4) / 5),
+                0,
+                255};
     }
 
-    static inline uint32_t get_block_values8(uint8_t *pDst, uint32_t l, uint32_t h) {
-        pDst[0] = static_cast<uint8_t>(l);
-        pDst[1] = static_cast<uint8_t>(h);
-        pDst[2] = static_cast<uint8_t>((l * 6 + h) / 7);
-        pDst[3] = static_cast<uint8_t>((l * 5 + h * 2) / 7);
-        pDst[4] = static_cast<uint8_t>((l * 4 + h * 3) / 7);
-        pDst[5] = static_cast<uint8_t>((l * 3 + h * 4) / 7);
-        pDst[6] = static_cast<uint8_t>((l * 2 + h * 5) / 7);
-        pDst[7] = static_cast<uint8_t>((l + h * 6) / 7);
-        return 8;
+    static inline std::array<uint8_t, 8> GetBlockValues8(uint32_t l, uint32_t h) {
+        return {static_cast<uint8_t>(l),
+                static_cast<uint8_t>(h),
+                static_cast<uint8_t>((l * 6 + h) / 7),
+                static_cast<uint8_t>((l * 5 + h * 2) / 7),
+                static_cast<uint8_t>((l * 4 + h * 3) / 7),
+                static_cast<uint8_t>((l * 3 + h * 4) / 7),
+                static_cast<uint8_t>((l * 2 + h * 5) / 7),
+                static_cast<uint8_t>((l + h * 6) / 7)};
     }
 
-    static inline uint32_t get_block_values(uint8_t *pDst, uint32_t l, uint32_t h) {
+    static inline std::array<uint8_t, 8> GetBlockValues(uint32_t l, uint32_t h) {
         if (l > h)
-            return get_block_values8(pDst, l, h);
+            return GetBlockValues8(l, h);
         else
-            return get_block_values6(pDst, l, h);
+            return GetBlockValues6(l, h);
     }
-};
 
-struct BC3Block {
-    BC4Block AlphaBlock;
-    BC1Block ColorBlock;
+    constexpr static inline size_t EndpointSize = 1;
+    constexpr static inline size_t SelectorSize = 6;
+    constexpr static inline uint8_t SelectorBits = 3;
+    constexpr static inline uint8_t SelectorValues = 1 << SelectorBits;
+    constexpr static inline uint8_t SelectorMask = SelectorValues - 1;
+
+    uint8_t low_alpha;
+    uint8_t high_alpha;
+    uint8_t selectors[SelectorSize];
 };
 
-struct BC5Block {
-    BC4Block RBlock;
-    BC4Block GBlock;
+class BC3Block {
+   public:
+    BC4Block alpha_block;
+    BC1Block color_block;
 };
 
+class BC5Block {
+   public:
+    BC4Block r_block;
+    BC4Block g_block;
+};
 #pragma pack(pop)
\ No newline at end of file
diff --git a/src/color.cpp b/src/color.cpp
new file mode 100644
index 0000000..694516d
--- /dev/null
+++ b/src/color.cpp
@@ -0,0 +1,76 @@
+/*  Python-rgbcx Texture Compression Library
+    Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
+    Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
+    and licenced under the public domain
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "color.h"
+#include <algorithm>
+#include <cassert>
+
+// region Color32 implementation
+Color32::Color32() { set(0, 0, 0, 0xFF); }
+
+Color32::Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A) { set(R, G, B, A); }
+
+uint16_t Color32::pack565Unscaled(uint16_t R, uint16_t G, uint16_t B) { return B | (G << 5) | (R << 11); }
+
+uint16_t Color32::pack565(uint16_t R, uint16_t G, uint16_t B) { return pack565Unscaled(scale8To5(R), scale8To6(G), scale8To5(B)); }
+
+Color32 Color32::unpack565(uint16_t Packed) {
+    uint8_t R = scale5To8((Packed >> 11) & 0x1F);
+    uint8_t G = scale6To8((Packed >> 5) & 0x3F);
+    uint8_t B = scale5To8(Packed & 0x1F);
+
+    return Color32(R, G, B);
+}
+
+uint8_t Color32::operator[](uint32_t Index) const {
+    assert(Index < 4);
+    return C[Index];
+}
+
+uint8_t &Color32::operator[](uint32_t Index) {
+    assert(Index < 4);
+    return C[Index];
+}
+
+void Color32::set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) {
+    this->R = R;
+    this->G = G;
+    this->B = B;
+    this->A = A;
+}
+
+void Color32::set(const Color32 &Other) {
+    this->R = Other.R;
+    this->G = Other.G;
+    this->B = Other.B;
+    this->A = Other.A;
+}
+
+Color32 Color32::min(const Color32 &a, const Color32 &b) {
+    return Color32(std::min(a[0], b[0]), std::min(a[1], b[1]), std::min(a[2], b[2]), std::min(a[3], b[3]));
+}
+
+Color32 Color32::max(const Color32 &a, const Color32 &b) {
+    return Color32(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3]));
+}
+
+uint16_t Color32::pack565() { return pack565(R, G, B); }
+
+uint16_t Color32::pack565Unscaled() { return pack565Unscaled(R, G, B); }
+// endregion
\ No newline at end of file
diff --git a/src/color.h b/src/color.h
new file mode 100644
index 0000000..864b7b9
--- /dev/null
+++ b/src/color.h
@@ -0,0 +1,64 @@
+/*  Python-rgbcx Texture Compression Library
+    Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
+    Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
+    and licenced under the public domain
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Affero General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Affero General Public License for more details.
+
+    You should have received a copy of the GNU Affero General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include "util.h"
+#include <cassert>
+#include <cstdint>
+#include <cstdlib>
+
+#pragma pack(push, 1)
+class Color32 {
+  public:
+    union {
+        struct {
+            uint8_t R;
+            uint8_t G;
+            uint8_t B;
+            uint8_t A;
+        };
+
+        uint8_t C[4];
+    };
+
+    Color32();
+
+    Color32(uint8_t R, uint8_t G, uint8_t B, uint8_t A = 0xFF);
+
+    static uint16_t pack565Unscaled(uint16_t R, uint16_t G, uint16_t B);
+    static uint16_t pack565(uint16_t R, uint16_t G, uint16_t B);
+
+    static Color32 unpack565(uint16_t Packed);
+
+    bool operator==(const Color32 &Rhs) const { return R == Rhs.R && G == Rhs.G && B == Rhs.B && A == Rhs.A; }
+
+    uint8_t operator[](uint32_t Index) const;
+    uint8_t &operator[](uint32_t Index);
+
+    uint16_t pack565();
+    uint16_t pack565Unscaled();
+
+    static Color32 min(const Color32 &A, const Color32 &B);
+    static Color32 max(const Color32 &A, const Color32 &B);
+
+    void set(uint8_t R, uint8_t G, uint8_t B, uint8_t A);
+
+    void set(const Color32 &Other);
+};
+#pragma pack(pop)
\ No newline at end of file
diff --git a/src/rgbcx.cpp b/src/rgbcx.cpp
index 22d5229..cd5291f 100644
--- a/src/rgbcx.cpp
+++ b/src/rgbcx.cpp
@@ -1,16 +1,16 @@
 // rgbcx.h v1.12
 // High-performance scalar BC1-5 encoders. Public Domain or MIT license (you choose - see below), written by Richard Geldreich 2020 <richgel99@gmail.com>.
 
+#include "rgbcx.h"
+#include "blocks.h"
+#include "color.h"
+#include "tables.h"
+#include "util.h"
 #include <algorithm>
 #include <cassert>
 #include <climits>
 #include <cstdlib>
 #include <cstring>
-#include <cmath>
-#include "util.h"
-#include "tables.h"
-#include "blocks.h"
-#include "rgbcx.h"
 
 namespace rgbcx {
 
@@ -159,7 +159,7 @@ static inline int interp_half_5_6_amd(int c0, int c1) {
 }
 
 static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) {
-    // assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1);
+    // assert(scale_5_to_8(v0) == c0 && scale5To8(v1) == c1);
     switch (mode) {
     case bc1_approx_mode::cBC1NVidia:
         return interp_5_nv(v0, v1);
@@ -176,7 +176,7 @@ static inline int interp_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode)
 static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) {
     (void)v0;
     (void)v1;
-    // assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1);
+    // assert(scale_6_to_8(v0) == c0 && scale6To8(v1) == c1);
     switch (mode) {
     case bc1_approx_mode::cBC1NVidia:
         return interp_6_nv(c0, c1);
@@ -191,7 +191,7 @@ static inline int interp_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode)
 }
 
 static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode mode) {
-    assert(scale_5_to_8(v0) == c0 && scale_5_to_8(v1) == c1);
+    assert(scale5To8(v0) == c0 && scale5To8(v1) == c1);
     switch (mode) {
     case bc1_approx_mode::cBC1NVidia:
         return interp_half_5_nv(v0, v1);
@@ -207,7 +207,7 @@ static inline int interp_half_5(int v0, int v1, int c0, int c1, bc1_approx_mode
 static inline int interp_half_6(int v0, int v1, int c0, int c1, bc1_approx_mode mode) {
     (void)v0;
     (void)v1;
-    assert(scale_6_to_8(v0) == c0 && scale_6_to_8(v1) == c1);
+    assert(scale6To8(v0) == c0 && scale6To8(v1) == c1);
     switch (mode) {
     case bc1_approx_mode::cBC1NVidia:
         return interp_half_6_nv(c0, c1);
@@ -434,12 +434,12 @@ void encode_bc1_solid_block(void *pDst, uint32_t fr, uint32_t fg, uint32_t fb, b
         }
     }
 
-    pDst_block->set_low_color(static_cast<uint16_t>(max16));
-    pDst_block->set_high_color(static_cast<uint16_t>(min16));
-    pDst_block->Selectors[0] = static_cast<uint8_t>(mask);
-    pDst_block->Selectors[1] = static_cast<uint8_t>(mask);
-    pDst_block->Selectors[2] = static_cast<uint8_t>(mask);
-    pDst_block->Selectors[3] = static_cast<uint8_t>(mask);
+    pDst_block->SetLowColor(static_cast<uint16_t>(max16));
+    pDst_block->SetHighColor(static_cast<uint16_t>(min16));
+    pDst_block->selectors[0] = static_cast<uint8_t>(mask);
+    pDst_block->selectors[1] = static_cast<uint8_t>(mask);
+    pDst_block->selectors[2] = static_cast<uint8_t>(mask);
+    pDst_block->selectors[3] = static_cast<uint8_t>(mask);
 }
 
 static const float g_midpoint5[32] = {.015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f,
@@ -944,8 +944,8 @@ static inline void precise_round_565_noscale(vec3F xl, vec3F xh, int &trial_lr,
 }
 
 static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) {
-    uint32_t lc16 = BC1Block::pack_unscaled_color(lr, lg, lb);
-    uint32_t hc16 = BC1Block::pack_unscaled_color(hr, hg, hb);
+    uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb);
+    uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb);
 
     // Always forbid 3 color blocks
     if (lc16 == hc16) {
@@ -964,13 +964,13 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int
         }
 
         assert(lc16 > hc16);
-        pDst_block->set_low_color(static_cast<uint16_t>(lc16));
-        pDst_block->set_high_color(static_cast<uint16_t>(hc16));
+        pDst_block->SetLowColor(static_cast<uint16_t>(lc16));
+        pDst_block->SetHighColor(static_cast<uint16_t>(hc16));
 
-        pDst_block->Selectors[0] = mask;
-        pDst_block->Selectors[1] = mask;
-        pDst_block->Selectors[2] = mask;
-        pDst_block->Selectors[3] = mask;
+        pDst_block->selectors[0] = mask;
+        pDst_block->selectors[1] = mask;
+        pDst_block->selectors[2] = mask;
+        pDst_block->selectors[3] = mask;
     } else {
         uint8_t invert_mask = 0;
         if (lc16 < hc16) {
@@ -979,24 +979,25 @@ static inline void bc1_encode4(BC1Block *pDst_block, int lr, int lg, int lb, int
         }
 
         assert(lc16 > hc16);
-        pDst_block->set_low_color((uint16_t)lc16);
-        pDst_block->set_high_color((uint16_t)hc16);
+        pDst_block->SetLowColor((uint16_t)lc16);
+        pDst_block->SetHighColor((uint16_t)hc16);
 
         uint32_t packed_sels = 0;
         static const uint8_t s_sel_trans[4] = {0, 2, 3, 1};
         for (uint32_t i = 0; i < 16; i++)
             packed_sels |= ((uint32_t)s_sel_trans[sels[i]] << (i * 2));
 
-        pDst_block->Selectors[0] = (uint8_t)packed_sels ^ invert_mask;
-        pDst_block->Selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
-        pDst_block->Selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
-        pDst_block->Selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
+        // todo: make this less silly to prevent packing and unpacking
+        pDst_block->selectors[0] = (uint8_t)packed_sels ^ invert_mask;
+        pDst_block->selectors[1] = (uint8_t)(packed_sels >> 8) ^ invert_mask;
+        pDst_block->selectors[2] = (uint8_t)(packed_sels >> 16) ^ invert_mask;
+        pDst_block->selectors[3] = (uint8_t)(packed_sels >> 24) ^ invert_mask;
     }
 }
 
 static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int hr, int hg, int hb, const uint8_t sels[16]) {
-    uint32_t lc16 = BC1Block::pack_unscaled_color(lr, lg, lb);
-    uint32_t hc16 = BC1Block::pack_unscaled_color(hr, hg, hb);
+    uint16_t lc16 = Color32::pack565Unscaled(lr, lg, lb);
+    uint16_t hc16 = Color32::pack565Unscaled(hr, hg, hb);
 
     bool invert_flag = false;
     if (lc16 > hc16) {
@@ -1006,8 +1007,8 @@ static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int
 
     assert(lc16 <= hc16);
 
-    pDst_block->set_low_color((uint16_t)lc16);
-    pDst_block->set_high_color((uint16_t)hc16);
+    pDst_block->SetLowColor((uint16_t)lc16);
+    pDst_block->SetHighColor((uint16_t)hc16);
 
     uint32_t packed_sels = 0;
 
@@ -1021,10 +1022,11 @@ static inline void bc1_encode3(BC1Block *pDst_block, int lr, int lg, int lb, int
             packed_sels |= ((uint32_t)sels[i] << (i * 2));
     }
 
-    pDst_block->Selectors[0] = (uint8_t)packed_sels;
-    pDst_block->Selectors[1] = (uint8_t)(packed_sels >> 8);
-    pDst_block->Selectors[2] = (uint8_t)(packed_sels >> 16);
-    pDst_block->Selectors[3] = (uint8_t)(packed_sels >> 24);
+    // todo: make this less silly to prevent packing and unpacking
+    pDst_block->selectors[0] = (uint8_t)packed_sels;
+    pDst_block->selectors[1] = (uint8_t)(packed_sels >> 8);
+    pDst_block->selectors[2] = (uint8_t)(packed_sels >> 16);
+    pDst_block->selectors[3] = (uint8_t)(packed_sels >> 24);
 }
 
 struct bc1_encode_results {
@@ -1138,13 +1140,13 @@ static bool try_3color_block_useblack(const Color32 *pSrc_pixels, uint32_t flags
         }
     }
 
-    int lr = scale_8_to_5(pSrc_pixels[low_c].R);
-    int lg = scale_8_to_6(pSrc_pixels[low_c].G);
-    int lb = scale_8_to_5(pSrc_pixels[low_c].B);
+    int lr = scale8To5(pSrc_pixels[low_c].R);
+    int lg = scale8To6(pSrc_pixels[low_c].G);
+    int lb = scale8To5(pSrc_pixels[low_c].B);
 
-    int hr = scale_8_to_5(pSrc_pixels[high_c].R);
-    int hg = scale_8_to_6(pSrc_pixels[high_c].G);
-    int hb = scale_8_to_5(pSrc_pixels[high_c].B);
+    int hr = scale8To5(pSrc_pixels[high_c].R);
+    int hg = scale8To6(pSrc_pixels[high_c].G);
+    int hb = scale8To5(pSrc_pixels[high_c].B);
 
     uint8_t trial_sels[16];
     uint32_t trial_err = bc1_find_sels3_fullerr(true, pSrc_pixels, lr, lg, lb, hr, hg, hb, trial_sels, UINT32_MAX);
@@ -1491,14 +1493,14 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
 
         // Grayscale blocks are a common enough case to specialize.
         if ((max_r - min_r) < 2) {
-            lr = lb = hr = hb = scale_8_to_5(fr);
-            lg = hg = scale_8_to_6(fr);
+            lr = lb = hr = hb = scale8To5(fr);
+            lg = hg = scale8To6(fr);
         } else {
-            lr = lb = scale_8_to_5(min_r);
-            lg = scale_8_to_6(min_r);
+            lr = lb = scale8To5(min_r);
+            lg = scale8To6(min_r);
 
-            hr = hb = scale_8_to_5(max_r);
-            hg = scale_8_to_6(max_r);
+            hr = hb = scale8To5(max_r);
+            hg = scale8To6(max_r);
         }
     } else if (flags & cEncodeBC1Use2DLS) {
         //  2D Least Squares approach from Humus's example, with added inset and optimal rounding.
@@ -1721,13 +1723,13 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
         if (icov_yz < 0)
             std::swap(y0, y1);
 
-        lr = scale_8_to_5(x0);
-        lg = scale_8_to_6(y0);
-        lb = scale_8_to_5(min_b);
+        lr = scale8To5(x0);
+        lg = scale8To6(y0);
+        lb = scale8To5(min_b);
 
-        hr = scale_8_to_5(x1);
-        hg = scale_8_to_6(y1);
-        hb = scale_8_to_5(max_b);
+        hr = scale8To5(x1);
+        hg = scale8To6(y1);
+        hb = scale8To5(max_b);
     } else {
         // Select 2 colors along the principle axis. (There must be a faster/simpler way.)
         uint32_t low_c = 0, high_c = 0;
@@ -1806,13 +1808,13 @@ static inline void encode_bc1_pick_initial(const Color32 *pSrc_pixels, uint32_t
         low_c = low_dot & 15;
         high_c = high_dot & 15;
 
-        lr = scale_8_to_5(pSrc_pixels[low_c].R);
-        lg = scale_8_to_6(pSrc_pixels[low_c].G);
-        lb = scale_8_to_5(pSrc_pixels[low_c].B);
+        lr = scale8To5(pSrc_pixels[low_c].R);
+        lg = scale8To6(pSrc_pixels[low_c].G);
+        lb = scale8To5(pSrc_pixels[low_c].B);
 
-        hr = scale_8_to_5(pSrc_pixels[high_c].R);
-        hg = scale_8_to_6(pSrc_pixels[high_c].G);
-        hb = scale_8_to_5(pSrc_pixels[high_c].B);
+        hr = scale8To5(pSrc_pixels[high_c].R);
+        hg = scale8To6(pSrc_pixels[high_c].G);
+        hb = scale8To5(pSrc_pixels[high_c].B);
     }
 }
 
@@ -2349,6 +2351,7 @@ void encode_bc4(void *pDst, const uint8_t *pPixels, uint32_t stride) {
 
     const uint64_t f = a0 | a1 | a2 | a3;
 
+    // TODO: make this less silly by using the BC4Block class
     pDst_bytes[2] = (uint8_t)f;
     pDst_bytes[3] = (uint8_t)(f >> 8U);
     pDst_bytes[4] = (uint8_t)(f >> 16U);
@@ -2357,28 +2360,28 @@ void encode_bc4(void *pDst, const uint8_t *pPixels, uint32_t stride) {
     pDst_bytes[7] = (uint8_t)(f >> 40U);
 }
 
-void encode_bc3(void *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t total_orderings_to_try) {
+void encode_bc3(BC3Block *pDst, const uint8_t *pPixels, uint32_t flags, uint32_t total_orderings_to_try) {
     assert(g_initialized);
 
     // 3-color blocks are not allowed with BC3 (on most GPU's).
     flags &= ~(cEncodeBC1Use3ColorBlocksForBlackPixels | cEncodeBC1Use3ColorBlocks);
 
-    encode_bc4(pDst, pPixels + 3, 4);
-    encode_bc1(static_cast<uint8_t *>(pDst) + 8, pPixels, flags, total_orderings_to_try);
+    encode_bc4(&pDst->alpha_block, pPixels + 3, 4);
+    encode_bc1(&pDst->color_block, pPixels, flags, total_orderings_to_try);
 }
 
-void encode_bc3(uint32_t level, void *pDst, const uint8_t *pPixels) {
+void encode_bc3(uint32_t level, BC3Block *pDst, const uint8_t *pPixels) {
     assert(g_initialized);
 
-    encode_bc4(pDst, pPixels + 3, 4);
-    encode_bc1(level, static_cast<uint8_t *>(pDst) + 8, pPixels, false, false);
+    encode_bc4(&pDst->alpha_block, pPixels + 3, 4);
+    encode_bc1(level, &pDst->color_block, pPixels, false, false);
 }
 
-void encode_bc5(void *pDst, const uint8_t *pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) {
+void encode_bc5(BC5Block *pDst, const uint8_t *pPixels, uint32_t chan0, uint32_t chan1, uint32_t stride) {
     assert(g_initialized);
 
-    encode_bc4(pDst, pPixels + chan0, stride);
-    encode_bc4(static_cast<uint8_t *>(pDst) + 8, pPixels + chan1, stride);
+    encode_bc4(&pDst->r_block, pPixels + chan0, stride);
+    encode_bc4(&pDst->g_block, pPixels + chan1, stride);
 }
 
 // Returns true if the block uses 3 color punchthrough alpha mode.
@@ -2390,8 +2393,8 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr
 
     const BC1Block *pBlock = static_cast<const BC1Block *>(pBlock_bits);
 
-    const uint32_t l = pBlock->get_low_color();
-    const uint32_t h = pBlock->get_high_color();
+    const uint32_t l = pBlock->GetLowColor();
+    const uint32_t h = pBlock->GetHighColor();
 
     Color32 c[4];
 
@@ -2454,17 +2457,17 @@ bool unpack_bc1(const void *pBlock_bits, void *pPixels, bool set_alpha, bc1_appr
 
     if (set_alpha) {
         for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) {
-            pDst_pixels[0] = c[pBlock->get_selector(0, y)];
-            pDst_pixels[1] = c[pBlock->get_selector(1, y)];
-            pDst_pixels[2] = c[pBlock->get_selector(2, y)];
-            pDst_pixels[3] = c[pBlock->get_selector(3, y)];
+            pDst_pixels[0] = c[pBlock->GetSelector(0, y)];
+            pDst_pixels[1] = c[pBlock->GetSelector(1, y)];
+            pDst_pixels[2] = c[pBlock->GetSelector(2, y)];
+            pDst_pixels[3] = c[pBlock->GetSelector(3, y)];
         }
     } else {
         for (uint32_t y = 0; y < 4; y++, pDst_pixels += 4) {
-            pDst_pixels[0].set(c[pBlock->get_selector(0, y)]);
-            pDst_pixels[1].set(c[pBlock->get_selector(1, y)]);
-            pDst_pixels[2].set(c[pBlock->get_selector(2, y)]);
-            pDst_pixels[3].set(c[pBlock->get_selector(3, y)]);
+            pDst_pixels[0].set(c[pBlock->GetSelector(0, y)]);
+            pDst_pixels[1].set(c[pBlock->GetSelector(1, y)]);
+            pDst_pixels[2].set(c[pBlock->GetSelector(2, y)]);
+            pDst_pixels[3].set(c[pBlock->GetSelector(3, y)]);
         }
     }
 
@@ -2476,16 +2479,15 @@ void unpack_bc4(const void *pBlock_bits, uint8_t *pPixels, uint32_t stride) {
 
     const BC4Block *pBlock = static_cast<const BC4Block *>(pBlock_bits);
 
-    uint8_t sel_values[8];
-    BC4Block::get_block_values(sel_values, pBlock->get_low_alpha(), pBlock->get_high_alpha());
+    auto sel_values = BC4Block::GetBlockValues(pBlock->GetLowAlpha(), pBlock->GetHighAlpha());
 
-    const uint64_t selector_bits = pBlock->get_selector_bits();
+    const uint64_t selector_bits = pBlock->GetSelectorBits();
 
     for (uint32_t y = 0; y < 4; y++, pPixels += (stride * 4U)) {
-        pPixels[0] = sel_values[pBlock->get_selector(0, y, selector_bits)];
-        pPixels[stride * 1] = sel_values[pBlock->get_selector(1, y, selector_bits)];
-        pPixels[stride * 2] = sel_values[pBlock->get_selector(2, y, selector_bits)];
-        pPixels[stride * 3] = sel_values[pBlock->get_selector(3, y, selector_bits)];
+        pPixels[0] = sel_values[pBlock->GetSelector(0, y, selector_bits)];
+        pPixels[stride * 1] = sel_values[pBlock->GetSelector(1, y, selector_bits)];
+        pPixels[stride * 2] = sel_values[pBlock->GetSelector(2, y, selector_bits)];
+        pPixels[stride * 3] = sel_values[pBlock->GetSelector(3, y, selector_bits)];
     }
 }
 
diff --git a/src/util.h b/src/util.h
index 8d02427..55d21fb 100644
--- a/src/util.h
+++ b/src/util.h
@@ -23,17 +23,17 @@
 static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i); }
 static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i); }
 
-static inline uint8_t scale_8_to_5(uint32_t v) {
+static inline uint8_t scale8To5(uint32_t v) {
     v = v * 31 + 128;
     return (uint8_t)((v + (v >> 8)) >> 8);
 }
-static inline uint8_t scale_8_to_6(uint32_t v) {
+static inline uint8_t scale8To6(uint32_t v) {
     v = v * 63 + 128;
     return (uint8_t)((v + (v >> 8)) >> 8);
 }
 
-static inline int scale_5_to_8(int v) { return (v << 3) | (v >> 2); }
-static inline int scale_6_to_8(int v) { return (v << 2) | (v >> 4); }
+static inline int scale5To8(int v) { return (v << 3) | (v >> 2); }
+static inline int scale6To8(int v) { return (v << 2) | (v >> 4); }
 
 template <typename S> inline S maximum(S a, S b) { return (a > b) ? a : b; }
 template <typename S> inline S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
@@ -55,7 +55,7 @@ static inline float clampf(float value, float low, float high) {
 static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
 
 template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
-static inline int32_t clampi(int32_t value, int32_t low, int32_t high) {
+static inline int32_t          clampi(int32_t value, int32_t low, int32_t high) {
     if (value < low)
         value = low;
     else if (value > high)
@@ -67,8 +67,3 @@ static inline int squarei(int a) { return a * a; }
 static inline int absi(int a) { return (a < 0) ? -a : a; }
 
 template <typename F> inline F lerp(F a, F b, F s) { return a + (b - a) * s; }
-
-
-
-
-