Implement BC1 Decoder

faster-single-tables
Andrew Cassidy 3 years ago
parent 1cf01f6f01
commit 76d39d7ef8

@ -27,8 +27,8 @@ namespace rgbcx {
template <class B, size_t M, size_t N> class BlockEncoder {
public:
using DecodedBlock = ColorBlock<M, N, Color32>;
using DecodedBlock = ColorBlock<Color32, M, N>;
using EncodedBlock = B;
virtual void EncodeBlock(const EncodedBlock *dest, const DecodedBlock *pixels) = 0;
virtual void EncodeBlock(EncodedBlock *dest, DecodedBlock *const pixels) const = 0;
};
} // namespace rgbcx

@ -26,14 +26,14 @@
#include "blocks.h"
template <size_t M, size_t N, class T> class ColorBlock {
template <typename T, size_t M, size_t N> class ColorBlock {
public:
using row = std::span<T, N>;
using Row = std::span<T, N>;
ColorBlock(const std::array<T *, N> &rows) {
for (int i = 0; i < height(); i++) { this[i] = row(rows[i], rows[i] * N * sizeof(T)); }
for (int i = 0; i < height(); i++) { this[i] = Row(rows[i], rows[i] * N * sizeof(T)); }
}
ColorBlock(const std::array<row, N> &rows) {
ColorBlock(const std::array<Row, N> &rows) {
for (int i = 0; i < height(); i++) { this[i] = rows[i]; }
}
@ -53,14 +53,15 @@ template <size_t M, size_t N, class T> class ColorBlock {
for (int i = 0; i < height(); i++) { _rows[i] = std::span(start[i * imageWidth]); }
}
constexpr T &operator[](size_t n) noexcept { return _rows[n]; }
constexpr Row &operator[](size_t n) noexcept { return _rows[n]; }
constexpr const Row &operator[](size_t n) const noexcept { return _rows[n]; }
constexpr int width() noexcept { return N; }
constexpr int height() noexcept { return M; }
constexpr int size() noexcept { return N * M; }
private:
std::array<row, M> _rows;
std::array<Row, M> _rows;
};
using Color4x4= ColorBlock<4, 4, Color32>;
using Color4x4= ColorBlock<Color32, 4, 4>;

@ -19,26 +19,25 @@
#include "BC1Decoder.h"
#include <array>
#include "ColorBlock.h"
void rgbcx::BC1Decoder::DecodeBlock(const Color4x4 *dest, const BC1Block *block) {
namespace rgbcx {
void BC1Decoder::DecodeBlock(Color4x4 *dest, BC1Block *const block) const {
const unsigned l = block->GetLowColor();
const unsigned h = block->GetHighColor();
const auto l_color = Color32::Unpack565(l);
const auto h_color = Color32::Unpack565(h);
std::array<Color32, 4> colors;
colors[0] = l_color;
colors[1] = h_color;
bool three_color = (h >= l);
if (three_color) {
colors[2] = _interpolator.InterpolateHalfColor(l_color, h_color);
colors[3] = Color32(0,0,0);
} else {
colors[2] = _interpolator.InterpolateColor()
const auto selectors = block->UnpackSelectors();
const auto colors = _interpolator.InterpolateBC1(l, h);
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
const auto selector = selectors[y][x];
const auto color = colors[selector];
assert(selector < 4);
assert((color.a == 0 && selector == 3 && l <= h) || color.a == UINT8_MAX);
if (_write_alpha) {
(*dest)[y][x].Set(color);
} else {
(*dest)[y][x].SetRGB(color);
}
}
}
}
} // namespace rgbcx

@ -19,19 +19,23 @@
#pragma once
#include "../blocks.h"
#include "../interpolator.h"
#include "../ndebug.h"
#include "BlockDecoder.h"
#include "blocks.h"
#include "interpolator.h"
namespace rgbcx {
class BC1Decoder : public BlockDecoder<BC1Block, 4, 4> {
public:
BC1Decoder(const Interpolator &interpolator) : _interpolator(interpolator) {}
BC1Decoder(const Interpolator &interpolator, bool write_alpha = false) : _interpolator(interpolator), _write_alpha(write_alpha) {}
BC1Decoder() : BC1Decoder(Interpolator()) {}
void DecodeBlock(const Color4x4 *dest, const BC1Block *block) override;
void DecodeBlock(Color4x4 *dest, BC1Block *const block) const noexcept(ndebug) override;
constexpr const Interpolator &GetInterpolator() const { return _interpolator; }
constexpr bool WritesAlpha() const { return _write_alpha; }
private:
const Interpolator &_interpolator;
const bool _write_alpha;
};
} // namespace rgbcx

@ -21,14 +21,14 @@
#include <cstdint>
#include "ColorBlock.h"
#include "../ColorBlock.h"
namespace rgbcx {
template <class B, size_t M, size_t N> class BlockDecoder {
public:
using DecodedBlock = ColorBlock<M, N, Color32>;
using DecodedBlock = ColorBlock<Color32, M, N>;
using EncodedBlock = B;
virtual void DecodeBlock(const DecodedBlock *dest, const EncodedBlock *block) = 0;
virtual void DecodeBlock(DecodedBlock *dest, EncodedBlock *const block) const = 0;
};
} // namespace rgbcx

@ -25,10 +25,13 @@
#include <cstdlib>
#include "color.h"
#include "util.h"
#pragma pack(push, 1)
class BC1Block {
public:
using UnpackedSelectors = std::array<std::array<uint8_t, 4>, 4>;
uint16_t GetLowColor() const { return _low_color[0] | _low_color[1] << 8U; }
uint16_t GetHighColor() const { return _high_color[0] | _high_color[1] << 8U; }
Color32 GetLowColor32() const { return Color32::Unpack565(GetLowColor()); }
@ -53,6 +56,20 @@ class BC1Block {
selectors[y] |= (val << (x * SelectorBits));
}
UnpackedSelectors UnpackSelectors() const {
UnpackedSelectors unpacked;
for (int i = 0; i < 4; i++) {
unpacked[i] = Unpack<uint8_t, uint8_t, 2, 4>(selectors[i]);
}
return unpacked;
}
void PackSelectors(const UnpackedSelectors& unpacked) {
for (int i = 0; i < 4; i++) {
selectors[i] = Pack<uint8_t, uint8_t, 2, 4>(unpacked[i]);
}
}
constexpr static inline size_t EndpointSize = 2;
constexpr static inline size_t SelectorSize = 4;
constexpr static inline uint8_t SelectorBits = 2;

@ -41,6 +41,14 @@ Color32 Color32::Unpack565(uint16_t Packed) {
return Color32(R, G, B);
}
Color32 Color32::Unpack565Unscaled(uint16_t Packed) {
uint8_t R = (Packed >> 11) & 0x1F;
uint8_t G = (Packed >> 5) & 0x3F;
uint8_t B = Packed & 0x1F;
return Color32(R, G, B);
}
uint8_t Color32::operator[](uint32_t Index) const {
assert(Index < 4);
return c[Index];
@ -51,18 +59,17 @@ uint8_t &Color32::operator[](uint32_t Index) {
return c[Index];
}
void Color32::Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A) {
this->r = R;
this->g = G;
this->b = B;
this->a = A;
void Color32::Set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va = 0xFF) {
r = vr;
g = vg;
b = vb;
a = va;
}
void Color32::Set(const Color32 &Other) {
this->r = Other.r;
this->g = Other.g;
this->b = Other.b;
this->a = Other.a;
void Color32::SetRGB(uint8_t vr, uint8_t vg, uint8_t vb) {
r = vr;
g = vg;
b = vb;
}
Color32 Color32::min(const Color32 &a, const Color32 &b) {
@ -76,4 +83,8 @@ Color32 Color32::max(const Color32 &a, const Color32 &b) {
uint16_t Color32::pack565() { return Pack565(r, g, b); }
uint16_t Color32::pack565Unscaled() { return Pack565Unscaled(r, g, b); }
Color32 Color32::ScaleTo565() const { return Color32(scale8To5(r), scale8To6(g), scale8To5(b)); }
Color32 Color32::ScaleFrom565() const { return Color32(scale5To8(r), scale6To8(g), scale5To8(b)); }
// endregion

@ -42,6 +42,7 @@ class Color32 {
static uint16_t Pack565Unscaled(uint16_t R, uint16_t G, uint16_t B);
static uint16_t Pack565(uint16_t R, uint16_t G, uint16_t B);
static Color32 Unpack565Unscaled(uint16_t Packed);
static Color32 Unpack565(uint16_t Packed);
bool operator==(const Color32 &Rhs) const { return r == Rhs.r && g == Rhs.g && b == Rhs.b && a == Rhs.a; }
@ -52,11 +53,16 @@ class Color32 {
uint16_t pack565();
uint16_t pack565Unscaled();
Color32 ScaleTo565() const;
Color32 ScaleFrom565() const;
static Color32 min(const Color32 &A, const Color32 &B);
static Color32 max(const Color32 &A, const Color32 &B);
void Set(uint8_t R, uint8_t G, uint8_t B, uint8_t A);
void Set(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va);
void Set(const Color32 &other) { Set(other.r, other.g, other.b, other.a); }
void Set(const Color32 &Other);
void SetRGB(uint8_t vr, uint8_t vg, uint8_t vb);
void SetRGB(const Color32 &other) { SetRGB(other.r, other.g, other.b); }
};
#pragma pack(pop)

@ -36,7 +36,7 @@ Interpolator::Interpolator() {
void Interpolator::PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len) {
int size = 1 << len;
assert((len == 5 && size == size5) || (len == 6 && size == size6));
assert((len == 5 && size == Size5) || (len == 6 && size == size6));
const uint8_t *expand = (len == 5) ? &Expand5[0] : &Expand6[0];
@ -93,7 +93,7 @@ int Interpolator::Interpolate6(int v0, int v1) const { return Interpolate8(scale
int Interpolator::InterpolateHalf5(int v0, int v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
int Interpolator::InterpolateHalf6(int v0, int v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
std::array<Color32, 4> Interpolator::InterpolateBC1(uint16_t low, uint16_t high) {
std::array<Color32, 4> Interpolator::InterpolateBC1(uint16_t low, uint16_t high) const {
auto colors = std::array<Color32, 4>();
colors[0] = Color32::Unpack565(low);
colors[1] = Color32::Unpack565(high);
@ -154,6 +154,27 @@ int InterpolatorNvidia::InterpolateHalf6(int v0, int v1) const {
const int gdiff = v1 - v0;
return (256 * v0 + gdiff / 4 + 128 + gdiff * 128) / 256;
}
std::array<Color32, 4> InterpolatorNvidia::InterpolateBC1(uint16_t low, uint16_t high) const {
// Nvidia is special and interpolation cant be done with 8-bit values, so we need to override the default behavior
auto colors = std::array<Color32, 4>();
auto low565 = Color32::Unpack565Unscaled(low);
auto high565 = Color32::Unpack565Unscaled(high);
colors[0] = low565.ScaleFrom565();
colors[1] = high565.ScaleFrom565();
if (low > high) {
// 4-color mode
colors[2] = InterpolateColor565(low565, high565);
colors[3] = InterpolateColor565(high565, low565);
} else {
// 3-color mode
colors[2] = InterpolateHalfColor565(low565, high565);
colors[3] = Color32(0, 0, 0, 0); // transparent black
}
return colors;
}
// endregion
// region InterpolatorAMD implementation

@ -24,19 +24,14 @@
#include <memory>
#include "color.h"
#include "ndebug.h"
#include "util.h"
#ifdef NDEBUG // asserts disabled
static constexpr bool ndebug = true;
#else // asserts enabled
static constexpr bool ndebug = false;
#endif
namespace rgbcx {
template <size_t size, int op(int)> static constexpr std::array<uint8_t, size> ExpandArray() {
std::array<uint8_t, size> res;
for (int i = 0; i < size; i++) { res[i] = op(i); }
template <size_t Size, int Op(int)> static constexpr std::array<uint8_t, Size> ExpandArray() {
std::array<uint8_t, Size> res;
for (int i = 0; i < Size; i++) { res[i] = Op(i); }
return res;
}
@ -48,9 +43,6 @@ class Interpolator {
// uint8_t error;
// };
constexpr static inline size_t size5 = 32;
constexpr static inline size_t size6 = 64;
virtual ~Interpolator() noexcept = default;
/**
@ -95,13 +87,13 @@ class Interpolator {
* @param high second 5:6:5 color for the block
* @return and array of 4 Color32 values, with indices matching BC1 selectors
*/
std::array<Color32, 4> InterpolateBC1(uint16_t low, uint16_t high);
virtual std::array<Color32, 4> InterpolateBC1(uint16_t low, uint16_t high) const;
private:
virtual int Interpolate8(int v0, int v1) const;
virtual int InterpolateHalf8(int v0, int v1) const;
// constexpr static auto Expand5 = ExpandArray<size5, scale5To8>();
// constexpr static auto Expand5 = ExpandArray<Size5, scale5To8>();
// constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
//
// // match tables used for single-color blocks
@ -136,7 +128,7 @@ class InterpolatorRound : public Interpolator {
int Interpolate6(int v0, int v1) const override;
private:
int Interpolate8(int v0, int v1) const;
int Interpolate8(int v0, int v1) const override;
};
class InterpolatorNvidia : public Interpolator {
@ -145,7 +137,17 @@ class InterpolatorNvidia : public Interpolator {
int Interpolate6(int v0, int v1) const override;
int InterpolateHalf5(int v0, int v1) const override;
int InterpolateHalf6(int v0, int v1) const override;
std::array<Color32, 4> InterpolateBC1(uint16_t low, uint16_t high) const override;
constexpr bool isIdeal() noexcept override { return false; }
private:
Color32 InterpolateColor565(const Color32 &c0, const Color32 &c1) const {
return Color32(Interpolate5(c0.r, c1.r), Interpolate6(c0.g, c1.g), Interpolate5(c0.b, c1.b));
}
Color32 InterpolateHalfColor565(const Color32 &c0, const Color32 &c1) const {
return Color32(InterpolateHalf5(c0.r, c1.r), InterpolateHalf6(c0.g, c1.g), InterpolateHalf5(c0.b, c1.b));
}
};
class InterpolatorAMD : public Interpolator {
@ -157,7 +159,7 @@ class InterpolatorAMD : public Interpolator {
constexpr bool isIdeal() noexcept override { return false; }
private:
int Interpolate8(int v0, int v1) const;
int InterpolateHalf8(int v0, int v1) const;
int Interpolate8(int v0, int v1) const override;
int InterpolateHalf8(int v0, int v1) const override;
};
} // namespace rgbcx

@ -0,0 +1,26 @@
/* Python-rgbcx Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#ifdef NDEBUG // asserts disabled
constexpr bool ndebug = true;
#else // asserts enabled
constexpr bool ndebug = false;
#endif

@ -20,9 +20,31 @@
#pragma once
#include <cstdint>
#include "ndebug.h"
static inline uint32_t iabs(int32_t i) { return (i < 0) ? static_cast<uint32_t>(-i) : static_cast<uint32_t>(i); }
static inline uint64_t iabs(int64_t i) { return (i < 0) ? static_cast<uint64_t>(-i) : static_cast<uint64_t>(i); }
template <typename I, typename O, size_t S, size_t C> constexpr auto Unpack(I packed) noexcept(ndebug) {
std::array<O, C> vals;
I mask = (1 << S) - 1;
for (int i = 0; i < C; i++) {
vals[i] = (packed >> (i * S)) & mask;
assert(vals[i] < 1 << S);
}
return vals;
}
template <typename I, typename O, size_t S, size_t C> constexpr auto Pack(const std::array<I, C> &vals) noexcept(ndebug) {
O packed = 0;
for (int i = 0; i < C; i++) {
packed |= vals[i] << (i * S);
assert(vals[i] < 1 << S);
}
return packed;
}
static inline uint8_t scale8To5(uint32_t v) {
v = v * 31 + 128;
return (uint8_t)((v + (v >> 8)) >> 8);
@ -55,7 +77,7 @@ static inline float clampf(float value, float low, float high) {
static inline uint8_t clamp255(int32_t i) { return (uint8_t)((i & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
template <typename S> inline S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
static inline int32_t clampi(int32_t value, int32_t low, int32_t high) {
static inline int32_t clampi(int32_t value, int32_t low, int32_t high) {
if (value < low)
value = low;
else if (value > high)

Loading…
Cancel
Save