Implement BC1 Decoder

2024-09-13 06:37:34 +00:00 · 2021-02-10 13:34:11 -08:00
parent 1cf01f6f01
commit 76d39d7ef8
12 changed files with 176 additions and 67 deletions
--- a/src/BlockEncoder.h
+++ b/src/BlockEncoder.h
@ -27,8 +27,8 @@ namespace rgbcx {

 template <class B, size_t M, size_t N> class BlockEncoder {
   public:
-    using DecodedBlock = ColorBlock<M, N, Color32>;
+    using DecodedBlock = ColorBlock<Color32, M, N>;
    using EncodedBlock = B;
-    virtual void EncodeBlock(const EncodedBlock *dest, const DecodedBlock *pixels) = 0;
+    virtual void EncodeBlock(EncodedBlock *dest, DecodedBlock *const pixels) const = 0;
 };
 }  // namespace rgbcx
--- a/src/ColorBlock.h
+++ b/src/ColorBlock.h
@ -26,14 +26,14 @@

 #include "blocks.h"

-template <size_t M, size_t N, class T> class ColorBlock {
+template <typename T, size_t M, size_t N> class ColorBlock {
   public:
-    using row = std::span<T, N>;
+    using Row = std::span<T, N>;

    ColorBlock(const std::array<T *, N> &rows) {
-        for (int i = 0; i < height(); i++) { this[i] = row(rows[i], rows[i] * N * sizeof(T)); }
+        for (int i = 0; i < height(); i++) { this[i] = Row(rows[i], rows[i] * N * sizeof(T)); }
    }
-    ColorBlock(const std::array<row, N> &rows) {
+    ColorBlock(const std::array<Row, N> &rows) {
        for (int i = 0; i < height(); i++) { this[i] = rows[i]; }
    }

@ -53,14 +53,15 @@ template <size_t M, size_t N, class T> class ColorBlock {
        for (int i = 0; i < height(); i++) { _rows[i] = std::span(start[i * imageWidth]); }
    }

-    constexpr T &operator[](size_t n) noexcept { return _rows[n]; }
+    constexpr Row &operator[](size_t n) noexcept { return _rows[n]; }
+    constexpr const Row &operator[](size_t n) const noexcept { return _rows[n]; }

    constexpr int width() noexcept { return N; }
    constexpr int height() noexcept { return M; }
    constexpr int size() noexcept { return N * M; }

   private:
-    std::array<row, M> _rows;
+    std::array<Row, M> _rows;
 };

-using Color4x4= ColorBlock<4, 4, Color32>;
+using Color4x4= ColorBlock<Color32, 4, 4>;
--- a/src/Decoders/BC1Decoder.cpp
+++ b/src/Decoders/BC1Decoder.cpp
@ -19,26 +19,25 @@

 #include "BC1Decoder.h"

-#include <array>
-
-#include "ColorBlock.h"
-
-void rgbcx::BC1Decoder::DecodeBlock(const Color4x4 *dest, const BC1Block *block) {
+namespace rgbcx {
+void BC1Decoder::DecodeBlock(Color4x4 *dest, BC1Block *const block) const {
    const unsigned l = block->GetLowColor();
    const unsigned h = block->GetHighColor();
+    const auto selectors = block->UnpackSelectors();
+    const auto colors = _interpolator.InterpolateBC1(l, h);

-    const auto l_color = Color32::Unpack565(l);
-    const auto h_color = Color32::Unpack565(h);
-
-    std::array<Color32, 4> colors;
-    colors[0] = l_color;
-    colors[1] = h_color;
-
-    bool three_color = (h >= l);
-    if (three_color) {
-        colors[2] = _interpolator.InterpolateHalfColor(l_color, h_color);
-        colors[3] = Color32(0,0,0);
-    } else {
-        colors[2] = _interpolator.InterpolateColor()
+    for (int y = 0; y < 4; y++) {
+        for (int x = 0; x < 4; x++) {
+            const auto selector = selectors[y][x];
+            const auto color = colors[selector];
+            assert(selector < 4);
+            assert((color.a == 0 && selector == 3 && l <= h) || color.a == UINT8_MAX);
+            if (_write_alpha) {
+                (*dest)[y][x].Set(color);
+            } else {
+                (*dest)[y][x].SetRGB(color);
+            }
+        }
    }
 }
+}  // namespace rgbcx
--- a/src/Decoders/BC1Decoder.h
+++ b/src/Decoders/BC1Decoder.h
@ -19,19 +19,23 @@

 #pragma once

+#include "../blocks.h"
+#include "../interpolator.h"
+#include "../ndebug.h"
 #include "BlockDecoder.h"
-#include "blocks.h"
-#include "interpolator.h"
 namespace rgbcx {
 class BC1Decoder : public BlockDecoder<BC1Block, 4, 4> {
   public:
-    BC1Decoder(const Interpolator &interpolator) : _interpolator(interpolator) {}
-
+    BC1Decoder(const Interpolator &interpolator, bool write_alpha = false) : _interpolator(interpolator), _write_alpha(write_alpha) {}
    BC1Decoder() : BC1Decoder(Interpolator()) {}

-    void DecodeBlock(const Color4x4 *dest, const BC1Block *block) override;
+    void DecodeBlock(Color4x4 *dest, BC1Block *const block) const noexcept(ndebug) override;
+
+    constexpr const Interpolator &GetInterpolator() const { return _interpolator; }
+    constexpr bool WritesAlpha() const { return _write_alpha; }

   private:
    const Interpolator &_interpolator;
+    const bool _write_alpha;
 };
 }  // namespace rgbcx
--- a/src/Decoders/BlockDecoder.h
+++ b/src/Decoders/BlockDecoder.h
@ -21,14 +21,14 @@

 #include <cstdint>

-#include "ColorBlock.h"
+#include "../ColorBlock.h"

 namespace rgbcx {

 template <class B, size_t M, size_t N> class BlockDecoder {
   public:
-    using DecodedBlock = ColorBlock<M, N, Color32>;
+    using DecodedBlock = ColorBlock<Color32, M, N>;
    using EncodedBlock = B;
-    virtual void DecodeBlock(const DecodedBlock *dest, const EncodedBlock *block) = 0;
+    virtual void DecodeBlock(DecodedBlock *dest, EncodedBlock *const block) const = 0;
 };
 }  // namespace rgbcx
--- a/src/blocks.h
+++ b/src/blocks.h
@ -25,10 +25,13 @@
 #include <cstdlib>

 #include "color.h"
+#include "util.h"

 #pragma pack(push, 1)
 class BC1Block {
   public:
+    using UnpackedSelectors = std::array<std::array<uint8_t, 4>, 4>;
+
    uint16_t GetLowColor() const { return _low_color[0] | _low_color[1] << 8U; }
    uint16_t GetHighColor() const { return _high_color[0] | _high_color[1] << 8U; }
    Color32 GetLowColor32() const { return Color32::Unpack565(GetLowColor()); }
@ -53,6 +56,20 @@ class BC1Block {
        selectors[y] |= (val << (x * SelectorBits));
    }

+    UnpackedSelectors UnpackSelectors() const {
+        UnpackedSelectors unpacked;
+        for (int i = 0; i < 4; i++) {
+            unpacked[i] = Unpack<uint8_t, uint8_t, 2, 4>(selectors[i]);
+        }
+        return unpacked;
+    }
+
+    void PackSelectors(const UnpackedSelectors& unpacked) {
+        for (int i = 0; i < 4; i++) {
+            selectors[i] = Pack<uint8_t, uint8_t, 2, 4>(unpacked[i]);
+        }
+    }
+
    constexpr static inline size_t EndpointSize = 2;
    constexpr static inline size_t SelectorSize = 4;
    constexpr static inline uint8_t SelectorBits = 2;
--- a/src/color.cpp
+++ b/src/color.cpp
@ -41,6 +41,14 @@ Color32 Color32::Unpack565(uint16_t Packed) {
    return Color32(R, G, B);
 }

+Color32 Color32::Unpack565Unscaled(uint16_t Packed) {
+    uint8_t R = (Packed >> 11) & 0x1F;
+    uint8_t G = (Packed >> 5) & 0x3F;
+    uint8_t B = Packed & 0x1F;
+
+    return Color32(R, G, B);
+}
+
 uint8_t Color32::operator[](uint32_t Index) const {
    assert(Index < 4);
    return c[Index];
@ -51,18 +59,17 @@ uint8_t &Color32::operator[](uint32_t Index) {
    return c[Index];
 }

-void Color32::Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) {
-    this->r = R;
-    this->g = G;
-    this->b = B;
-    this->a = A;
+void Color32::Set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va = 0xFF) {
+    r = vr;
+    g = vg;
+    b = vb;
+    a = va;
 }

-void Color32::Set(const Color32 &Other) {
-    this->r = Other.r;
-    this->g = Other.g;
-    this->b = Other.b;
-    this->a = Other.a;
+void Color32::SetRGB(uint8_t vr, uint8_t vg, uint8_t vb) {
+    r = vr;
+    g = vg;
+    b = vb;
 }

 Color32 Color32::min(const Color32 &a, const Color32 &b) {
@ -76,4 +83,8 @@ Color32 Color32::max(const Color32 &a, const Color32 &b) {
 uint16_t Color32::pack565() { return Pack565(r, g, b); }

 uint16_t Color32::pack565Unscaled() { return Pack565Unscaled(r, g, b); }
+
+Color32 Color32::ScaleTo565() const { return Color32(scale8To5(r), scale8To6(g), scale8To5(b)); }
+Color32 Color32::ScaleFrom565() const { return Color32(scale5To8(r), scale6To8(g), scale5To8(b)); }
+
 // endregion
--- a/src/color.h
+++ b/src/color.h
@ -42,6 +42,7 @@ class Color32 {
    static uint16_t Pack565Unscaled(uint16_t R, uint16_t G, uint16_t B);
    static uint16_t Pack565(uint16_t R, uint16_t G, uint16_t B);

+    static Color32 Unpack565Unscaled(uint16_t Packed);
    static Color32 Unpack565(uint16_t Packed);

    bool operator==(const Color32 &Rhs) const { return r == Rhs.r && g == Rhs.g && b == Rhs.b && a == Rhs.a; }
@ -52,11 +53,16 @@ class Color32 {
    uint16_t pack565();
    uint16_t pack565Unscaled();

+    Color32 ScaleTo565() const;
+    Color32 ScaleFrom565() const;
+
    static Color32 min(const Color32 &A, const Color32 &B);
    static Color32 max(const Color32 &A, const Color32 &B);

-    void Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A);
+    void Set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va);
+    void Set(const Color32 &other) { Set(other.r, other.g, other.b, other.a); }

-    void Set(const Color32 &Other);
+    void SetRGB(uint8_t vr, uint8_t vg, uint8_t vb);
+    void SetRGB(const Color32 &other) { SetRGB(other.r, other.g, other.b); }
 };
 #pragma pack(pop)
--- a/src/interpolator.cpp
+++ b/src/interpolator.cpp
@ -36,7 +36,7 @@ Interpolator::Interpolator() {
 void Interpolator::PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len) {
    int size = 1 << len;

-    assert((len == 5 && size == size5) || (len == 6 && size == size6));
+    assert((len == 5 && size == Size5) || (len == 6 && size == size6));

    const uint8_t *expand = (len == 5) ? &Expand5[0] : &Expand6[0];

@ -93,7 +93,7 @@ int Interpolator::Interpolate6(int v0, int v1) const { return Interpolate8(scale
 int Interpolator::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
 int Interpolator::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }

-std::array<Color32, 4> Interpolator::InterpolateBC1(uint16_t low, uint16_t high) {
+std::array<Color32, 4> Interpolator::InterpolateBC1(uint16_t low, uint16_t high) const {
    auto colors = std::array<Color32, 4>();
    colors[0] = Color32::Unpack565(low);
    colors[1] = Color32::Unpack565(high);
@ -154,6 +154,27 @@ int InterpolatorNvidia::InterpolateHalf6(int v0, int v1) const {
    const int gdiff = v1 - v0;
    return (256 * v0 + gdiff / 4 + 128 + gdiff * 128) / 256;
 }
+
+std::array<Color32, 4> InterpolatorNvidia::InterpolateBC1(uint16_t low, uint16_t high) const {
+    // Nvidia is special and interpolation cant be done with 8-bit values, so we need to override the default behavior
+    auto colors = std::array<Color32, 4>();
+    auto low565 = Color32::Unpack565Unscaled(low);
+    auto high565 = Color32::Unpack565Unscaled(high);
+    colors[0] = low565.ScaleFrom565();
+    colors[1] = high565.ScaleFrom565();
+
+    if (low > high) {
+        // 4-color mode
+        colors[2] = InterpolateColor565(low565, high565);
+        colors[3] = InterpolateColor565(high565, low565);
+    } else {
+        // 3-color mode
+        colors[2] = InterpolateHalfColor565(low565, high565);
+        colors[3] = Color32(0, 0, 0, 0);  // transparent black
+    }
+
+    return colors;
+}
 // endregion

 // region InterpolatorAMD implementation
--- a/src/interpolator.h
+++ b/src/interpolator.h
@ -24,19 +24,14 @@
 #include <memory>

 #include "color.h"
+#include "ndebug.h"
 #include "util.h"

-#ifdef NDEBUG  // asserts disabled
-static constexpr bool ndebug = true;
-#else  // asserts enabled
-static constexpr bool ndebug = false;
-#endif
-
 namespace rgbcx {

-template <size_t size, int op(int)> static constexpr std::array<uint8_t, size> ExpandArray() {
-    std::array<uint8_t, size> res;
-    for (int i = 0; i < size; i++) { res[i] = op(i); }
+template <size_t Size, int Op(int)> static constexpr std::array<uint8_t, Size> ExpandArray() {
+    std::array<uint8_t, Size> res;
+    for (int i = 0; i < Size; i++) { res[i] = Op(i); }
    return res;
 }

@ -48,9 +43,6 @@ class Interpolator {
    //        uint8_t error;
    //    };

-    constexpr static inline size_t size5 = 32;
-    constexpr static inline size_t size6 = 64;
-
    virtual ~Interpolator() noexcept = default;

    /**
@ -95,13 +87,13 @@ class Interpolator {
     * @param high second 5:6:5 color for the block
     * @return and array of 4 Color32 values, with indices matching BC1 selectors
     */
-    std::array<Color32, 4> InterpolateBC1(uint16_t low, uint16_t high);
+    virtual std::array<Color32, 4> InterpolateBC1(uint16_t low, uint16_t high) const;

   private:
    virtual int Interpolate8(int v0, int v1) const;
    virtual int InterpolateHalf8(int v0, int v1) const;

-    //    constexpr static auto Expand5 = ExpandArray<size5, scale5To8>();
+    //    constexpr static auto Expand5 = ExpandArray<Size5, scale5To8>();
    //    constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
    //
    //    // match tables used for single-color blocks
@ -136,7 +128,7 @@ class InterpolatorRound : public Interpolator {
    int Interpolate6(int v0, int v1) const override;

   private:
-    int Interpolate8(int v0, int v1) const;
+    int Interpolate8(int v0, int v1) const override;
 };

 class InterpolatorNvidia : public Interpolator {
@ -145,7 +137,17 @@ class InterpolatorNvidia : public Interpolator {
    int Interpolate6(int v0, int v1) const override;
    int InterpolateHalf5(int v0, int v1) const override;
    int InterpolateHalf6(int v0, int v1) const override;
+    std::array<Color32, 4> InterpolateBC1(uint16_t low, uint16_t high) const override;
    constexpr bool isIdeal() noexcept override { return false; }
+
+   private:
+    Color32 InterpolateColor565(const Color32 &c0, const Color32 &c1) const {
+        return Color32(Interpolate5(c0.r, c1.r), Interpolate6(c0.g, c1.g), Interpolate5(c0.b, c1.b));
+    }
+
+    Color32 InterpolateHalfColor565(const Color32 &c0, const Color32 &c1) const {
+        return Color32(InterpolateHalf5(c0.r, c1.r), InterpolateHalf6(c0.g, c1.g), InterpolateHalf5(c0.b, c1.b));
+    }
 };

 class InterpolatorAMD : public Interpolator {
@ -157,7 +159,7 @@ class InterpolatorAMD : public Interpolator {
    constexpr bool isIdeal() noexcept override { return false; }

   private:
-    int Interpolate8(int v0, int v1) const;
-    int InterpolateHalf8(int v0, int v1) const;
+    int Interpolate8(int v0, int v1) const override;
+    int InterpolateHalf8(int v0, int v1) const override;
 };
 }  // namespace rgbcx
--- a/src/ndebug.h
+++ b/src/ndebug.h
@ -0,0 +1,26 @@
+/*  Python-rgbcx Texture Compression Library
+    Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
+    Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
+    and licenced under the public domain
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU Lesser General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU Lesser General Public License for more details.
+
+    You should have received a copy of the GNU Lesser General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#ifdef NDEBUG  // asserts disabled
+constexpr bool ndebug = true;
+#else  // asserts enabled
+constexpr bool ndebug = false;
+#endif
--- a/src/util.h
+++ b/src/util.h
@ -20,9 +20,31 @@
 #pragma once
 #include <cstdint>

+#include "ndebug.h"
+
 static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i); }
 static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i); }

+template <typename I, typename O, size_t S, size_t C> constexpr auto Unpack(I packed) noexcept(ndebug) {
+    std::array<O, C> vals;
+    I mask = (1 << S) - 1;
+    for (int i = 0; i < C; i++) {
+        vals[i] = (packed >> (i * S)) & mask;
+        assert(vals[i] < 1 << S);
+    }
+
+    return vals;
+}
+
+template <typename I, typename O, size_t S, size_t C> constexpr auto Pack(const std::array<I, C> &vals) noexcept(ndebug) {
+    O packed = 0;
+    for (int i = 0; i < C; i++) {
+        packed |= vals[i] << (i * S);
+        assert(vals[i] < 1 << S);
+    }
+    return packed;
+}
+
 static inline uint8_t scale8To5(uint32_t v) {
    v = v * 31 + 128;
    return (uint8_t)((v + (v >> 8)) >> 8);
@ -55,7 +77,7 @@ static inline float clampf(float value, float low, float high) {
 static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }

 template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
-static inline int32_t          clampi(int32_t value, int32_t low, int32_t high) {
+static inline int32_t clampi(int32_t value, int32_t low, int32_t high) {
    if (value < low)
        value = low;
    else if (value > high)