Assorted cleanup

2024-09-13 06:37:34 +00:00 · 2022-05-31 23:09:20 -07:00 · 2022-05-31 23:09:20 -07:00 · f097f71ba9
commit f097f71ba9
parent 961c2b7134
7 changed files with 62 additions and 88 deletions
--- a/quicktex/OldColor.cpp
+++ b/quicktex/OldColor.cpp
@ -23,7 +23,7 @@

 #include "Vector4.h"
 #include "Vector4Int.h"
-#include "util.h"  // for scale5To8, scale8To5, assert5bit, scale6To8
+#include "util.h"  // for scale_to_8<5>, scale_from_8<5>, assert5bit, scale_to_8<6>

 namespace quicktex {

@ -47,7 +47,7 @@ uint16_t OldColor::Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b) {
 }

 uint16_t OldColor::Pack565(uint8_t r, uint8_t g, uint8_t b) {
-    return Pack565Unscaled(scale8To5(r), scale8To6(g), scale8To5(b));
+    return Pack565Unscaled(scale_from_8<5>(r), scale_from_8<6>(g), scale_from_8<5>(b));
 }

 OldColor OldColor::Unpack565Unscaled(uint16_t Packed) {
@ -59,9 +59,9 @@ OldColor OldColor::Unpack565Unscaled(uint16_t Packed) {
 }

 OldColor OldColor::Unpack565(uint16_t Packed) {
-    uint8_t r = static_cast<uint8_t>(scale5To8((Packed >> 11) & 0x1FU));
-    uint8_t g = static_cast<uint8_t>(scale6To8((Packed >> 5) & 0x3FU));
-    uint8_t b = static_cast<uint8_t>(scale5To8(Packed & 0x1FU));
+    uint8_t r = static_cast<uint8_t>(scale_to_8<5>((Packed >> 11) & 0x1FU));
+    uint8_t g = static_cast<uint8_t>(scale_to_8<6>((Packed >> 5) & 0x3FU));
+    uint8_t b = static_cast<uint8_t>(scale_to_8<5>(Packed & 0x1FU));

    return OldColor(r, g, b);
 }
@ -111,8 +111,8 @@ Vector4Int operator-(const OldColor &lhs, const OldColor &rhs) {
 uint16_t OldColor::Pack565() const { return Pack565(r, g, b); }
 uint16_t OldColor::Pack565Unscaled() const { return Pack565Unscaled(r, g, b); }

-OldColor OldColor::ScaleTo565() const { return OldColor(scale8To5(r), scale8To6(g), scale8To5(b)); }
-OldColor OldColor::ScaleFrom565() const { return OldColor(scale5To8(r), scale6To8(g), scale5To8(b)); }
+OldColor OldColor::ScaleTo565() const { return OldColor(scale_from_8<5>(r), scale_from_8<6>(g), scale_from_8<5>(b)); }
+OldColor OldColor::ScaleFrom565() const { return OldColor(scale_to_8<5>(r), scale_to_8<6>(g), scale_to_8<5>(b)); }

 bool OldColor::operator==(const OldColor &Rhs) const { return r == Rhs.r && g == Rhs.g && b == Rhs.b && a == Rhs.a; }
 bool OldColor::operator!=(const OldColor &Rhs) const { return !(Rhs == *this); }
--- a/quicktex/ctests/TestSIMD.cpp
+++ b/quicktex/ctests/TestSIMD.cpp
@ -20,13 +20,14 @@
 #include <utest.h>

 #include <array>
-#include <cassert>
 #include <cstdint>
+#include <limits>
 #include <numeric>
-#include <type_traits>
+#include <vector>
 #include <xsimd/xsimd.hpp>

 #include "../VecUtil.h"
+#include "../util.h"

 namespace quicktex::tests {

@ -47,7 +48,7 @@ template <typename T> constexpr auto make_arrays() {
    arrays.push_back(buffer);

    if (std::is_signed_v<T>) {
-        std::iota(buffer.begin(), buffer.end(), -1);
+        std::iota(buffer.begin(), buffer.end(), -1 * (int)xsimd::batch<T>::size);
        arrays.push_back(buffer);

        buffer.fill(-1);
--- a/quicktex/ctests/TestVec.cpp
+++ b/quicktex/ctests/TestVec.cpp
@ -17,9 +17,13 @@
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

-#include <utest.h>
+#include <stdlib.h>  // for abs
+#include <utest.h>   // for UTEST

-#include "../Vec.h"
+#include <array>  // for operator==
+
+#include "../Vec.h"   // for Vec, ope...
+#include "../util.h"  // for abs

 namespace quicktex::tests {

--- a/quicktex/s3tc/bc1/BC1Encoder.cpp
+++ b/quicktex/s3tc/bc1/BC1Encoder.cpp
@ -495,17 +495,17 @@ void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, cons

        if (metrics.max.r - metrics.min.r < 2) {
            // single color block
-            uint8_t fr5 = (uint8_t)scale8To5(fr);
-            uint8_t fr6 = (uint8_t)scale8To6(fr);
+            uint8_t fr5 = (uint8_t)scale_from_8<5>(fr);
+            uint8_t fr6 = (uint8_t)scale_from_8<6>(fr);

            result.low = OldColor(fr5, fr6, fr5);
            result.high = result.low;
        } else {
-            uint8_t lr5 = scale8To5(metrics.min.r);
-            uint8_t lr6 = scale8To6(metrics.min.r);
+            uint8_t lr5 = scale_from_8<5>(metrics.min.r);
+            uint8_t lr6 = scale_from_8<6>(metrics.min.r);

-            uint8_t hr5 = scale8To5(metrics.max.r);
-            uint8_t hr6 = scale8To6(metrics.max.r);
+            uint8_t hr5 = scale_from_8<5>(metrics.max.r);
+            uint8_t hr6 = scale_from_8<6>(metrics.max.r);

            result.low = OldColor(lr5, lr6, lr5);
            result.high = OldColor(hr5, hr6, hr5);
--- a/quicktex/s3tc/bc1/SingleColorTable.h
+++ b/quicktex/s3tc/bc1/SingleColorTable.h
@ -26,7 +26,7 @@
 #include "../../util.h"
 #include "../interpolator/Interpolator.h"

-namespace quicktex::s3tc  {
+namespace quicktex::s3tc {

 struct BC1MatchEntry {
    uint8_t high;
@ -59,10 +59,10 @@ template <size_t B, size_t N> MatchListPtr SingleColorTable(InterpolatorPtr inte
        // TODO: Can probably avoid testing for values that definitely wont yield good results,
        // e.g. low8 and high8 both much smaller or larger than index
        for (uint8_t low = 0; low < Size; low++) {
-            uint8_t low8 = (B == 5) ? scale5To8(low) : scale6To8(low);
+            uint8_t low8 = scale_to_8<B>(low);

            for (uint8_t high = 0; high < Size; high++) {
-                uint8_t high8 = (B == 5) ? scale5To8(high) : scale6To8(high);
+                uint8_t high8 = scale_to_8<B>(high);
                uint8_t value;

                if (use_8bit) {
--- a/quicktex/s3tc/interpolator/Interpolator.cpp
+++ b/quicktex/s3tc/interpolator/Interpolator.cpp
@ -45,10 +45,10 @@ std::unique_ptr<Interpolator> Interpolator::MakeInterpolator(Interpolator::Type
    }
 }

-uint8_t Interpolator::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
-uint8_t Interpolator::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
-uint8_t Interpolator::InterpolateHalf5(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
-uint8_t Interpolator::InterpolateHalf6(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
+uint8_t Interpolator::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale_to_8<5>(v0), scale_to_8<5>(v1)); }
+uint8_t Interpolator::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale_to_8<6>(v0), scale_to_8<6>(v1)); }
+uint8_t Interpolator::InterpolateHalf5(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale_to_8<5>(v0), scale_to_8<5>(v1)); }
+uint8_t Interpolator::InterpolateHalf6(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale_to_8<6>(v0), scale_to_8<6>(v1)); }

 std::array<OldColor, 4> Interpolator::Interpolate565BC1(uint16_t low, uint16_t high, bool allow_3color) const {
    bool use_3color = allow_3color && (high >= low);
@ -79,8 +79,8 @@ uint8_t Interpolator::InterpolateHalf8(uint8_t v0, uint8_t v1) const { return (v
 // endregion

 // region InterpolatorRound implementation
-uint8_t InterpolatorRound::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
-uint8_t InterpolatorRound::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
+uint8_t InterpolatorRound::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale_to_8<5>(v0), scale_to_8<5>(v1)); }
+uint8_t InterpolatorRound::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale_to_8<6>(v0), scale_to_8<6>(v1)); }

 uint8_t InterpolatorRound::Interpolate8(uint8_t v0, uint8_t v1) const { return (v0 * 2 + v1 + 1) / 3; }
 // endregion
@ -129,10 +129,10 @@ std::array<OldColor, 4> InterpolatorNvidia::InterpolateBC1(OldColor low, OldColo
 // endregion

 // region InterpolatorAMD implementation
-uint8_t InterpolatorAMD::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
-uint8_t InterpolatorAMD::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
-uint8_t InterpolatorAMD::InterpolateHalf5(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
-uint8_t InterpolatorAMD::InterpolateHalf6(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
+uint8_t InterpolatorAMD::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale_to_8<5>(v0), scale_to_8<5>(v1)); }
+uint8_t InterpolatorAMD::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale_to_8<6>(v0), scale_to_8<6>(v1)); }
+uint8_t InterpolatorAMD::InterpolateHalf5(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale_to_8<5>(v0), scale_to_8<5>(v1)); }
+uint8_t InterpolatorAMD::InterpolateHalf6(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale_to_8<6>(v0), scale_to_8<6>(v1)); }

 uint8_t InterpolatorAMD::Interpolate8(uint8_t v0, uint8_t v1) const { return (v0 * 43 + v1 * 21 + 32) >> 6; }

--- a/quicktex/util.h
+++ b/quicktex/util.h
@ -94,7 +94,7 @@ template <typename T> class const_iterator {
    size_t _index;
 };

-template <typename S, size_t N> S scale_from_8(S v) {
+template <size_t N, typename S> S scale_from_8(S v) {
    static_assert(N < 8);
    assert(v < (1 << 8));

@ -107,13 +107,13 @@ template <typename S, size_t N> S scale_from_8(S v) {
    return result;
 }

-template <typename S, size_t N> S scale_to_8(S v) {
+template <size_t N, typename S> S scale_to_8(S v) {
    static_assert(N < 8);
    assert(v < (1 << N));

-    constexpr unsigned lshift = 8 - N;
-    constexpr unsigned rshift = N - lshift;
-    S result = static_cast<S>((v << lshift) | (v >> rshift));
+    constexpr unsigned Lshift = 8 - N;
+    constexpr unsigned Rshift = N - Lshift;
+    S result = static_cast<S>((v << Lshift) | (v >> Rshift));

    assert(v < (1 << 8));

@ -140,12 +140,10 @@ size_t unpack_into(P packed, OI begin, OI end, WI widths, bool little_endian = t
        while (begin < end) {
            auto w = *(widths++);
            assert(w <= std::numeric_limits<U>::digits);
-            U result{0};

            auto mask = ((1 << w) - 1);
-            result = (packed >> offset) & mask;
+            *(begin++) = (packed >> offset) & mask;

-            *(begin++) = result;
            offset += w;  // increment offset
        }

@ -164,12 +162,9 @@ size_t unpack_into(P packed, OI begin, OI end, WI widths, bool little_endian = t
            auto w = *(widths++);
            offset -= w;  // decrement offset
            assert(w < std::numeric_limits<U>::digits);
-            U result{0};

            auto mask = ((1 << w) - 1);
-            result = (packed >> offset) & mask;
-
-            *(begin++) = result;
+            *(begin++) = (packed >> offset) & mask;
        }

        return total_offset;
@ -188,7 +183,7 @@ template <typename P, typename OR, typename WR>
    requires std::unsigned_integral<P> && range<OR> && range<WR>
 size_t unpack_into(P packed, OR &dest, const WR &widths, bool little_endian = true) {
    assert(distance(widths) == distance(dest));
-    return unpack_into(packed, dest.begin(), dest.end(), widths.begin(), little_endian = true);
+    return unpack_into(packed, dest.begin(), dest.end(), widths.begin(), little_endian);
 }

 /**
@ -217,24 +212,7 @@ size_t unpack_into(P packed, OI begin, OI end, size_t width, bool little_endian
 template <typename P, typename OR>
    requires std::unsigned_integral<P> && range<OR>
 size_t unpack_into(P packed, OR &dest, size_t width, bool little_endian = true) {
-    return unpack_into(packed, dest.begin(), dest.end(), const_iterator(width), little_endian = true);
-}
-
-/**
- * Unpacks an unsigned integer into an array of smaller integers
- * @tparam U unpacked data type
- * @tparam N number of values to unpack
- * @param packed value to unpack
- * @param width width of each packed element in bits
- * @param little_endian if the input has the first element in the least significant place
- * @return an array of unpacked values
- */
-template <typename U, size_t N, typename P>
-    requires std::unsigned_integral<P>
-std::array<U, N> unpack(P packed, size_t width, bool little_endian = true) {
-    std::array<U, N> unpacked;
-    unpack_into(packed, unpacked, width, little_endian);
-    return unpacked;
+    return unpack_into(packed, dest.begin(), dest.end(), const_iterator(width), little_endian);
 }

 /**
@ -284,6 +262,23 @@ std::array<U, N> unpack(P packed, const WR &widths, bool little_endian = true) {
    return unpack<U, N>(packed, widths.begin(), little_endian);
 }

+/**
+ * Unpacks an unsigned integer into an array of smaller integers
+ * @tparam U unpacked data type
+ * @tparam N number of values to unpack
+ * @param packed value to unpack
+ * @param width width of each packed element in bits
+ * @param little_endian if the input has the first element in the least significant place
+ * @return an array of unpacked values
+ */
+template <typename U, size_t N, typename P>
+    requires std::unsigned_integral<P>
+std::array<U, N> unpack(P packed, size_t width, bool little_endian = true) {
+    std::array<U, N> unpacked;
+    unpack_into(packed, unpacked, width, little_endian);
+    return unpacked;
+}
+
 /**
 * Packs an iterable of integers into a single integer.
 * @tparam II input iterator type
@ -372,12 +367,6 @@ inline constexpr P pack(IR r, size_t width, bool little_endian = true) {
    return pack<P>(r.begin(), r.end(), const_iterator(width), little_endian);
 }

-template <size_t Size, int Op(int)> constexpr std::array<uint8_t, Size> ExpandArray() {
-    std::array<uint8_t, Size> res;
-    for (int i = 0; i < Size; i++) { res[i] = Op(i); }
-    return res;
-}
-
 template <typename Seq, typename Fn> constexpr auto MapArray(const Seq &input, Fn op) {
    using I = typename Seq::value_type;
    using O = decltype(op(I{}));
@ -388,24 +377,6 @@ template <typename Seq, typename Fn> constexpr auto MapArray(const Seq &input, F
    return output;
 }

-template <typename S> constexpr S scale8To5(S v) {
-    auto v2 = v * 31 + 128;
-    return static_cast<S>((v2 + (v2 >> 8)) >> 8);
-}
-template <typename S> constexpr S scale8To6(S v) {
-    auto v2 = v * 63 + 128;
-    return static_cast<S>((v2 + (v2 >> 8)) >> 8);
-}
-
-template <typename S> constexpr S scale5To8(S v) {
-    assert5bit(v);
-    return static_cast<S>((v << 3) | (v >> 2));
-}
-template <typename S> constexpr S scale6To8(S v) {
-    assert6bit(v);
-    return static_cast<S>((v << 2) | (v >> 4));
-}
-
 template <typename S> constexpr S clamp(S value, S low, S high) {
    assert(low <= high);
    if (value < low) return low;
@ -416,8 +387,6 @@ template <typename S> constexpr S clamp(S value, S low, S high) {
 using std::abs;    // abs overload for builtin types
 using xsimd::abs;  // provides overload for abs<xsimd::batch>

-template <typename F> constexpr F lerp(F a, F b, F s) { return a + (b - a) * s; }
-
 template <typename... Args> std::string Format(const char *str, const Args &...args) {
    auto output = std::string(str);