Various small tweaks, disable fat binary creation

3 years ago · b42c52030d
parent 77cc6a8132
commit b42c52030d
6 changed files with 35 additions and 53 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -38,9 +38,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")

 if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi")
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -fsanitize=undefined")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
+    set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined")
    set(PROJECT_WARNINGS ${CLANG_WARNINGS})
-    if (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
-        set_property(TARGET python_rgbcx test_rgbcx PROPERTY OSX_ARCHITECTURES_RELEASE x86_64 arm64) #Mach-O fat binary for arm and x86
-    endif ()
 endif ()
--- a/src/BlockDecoder.h
+++ b/src/BlockDecoder.h
@ -40,14 +40,7 @@ template <class B, size_t M, size_t N> class BlockDecoder {

    virtual void DecodeBlock(DecodedBlock dest, EncodedBlock *const block) const noexcept(ndebug) = 0;

-    void DecodeRow(std::span<DecodedBlock> dests, std::span<const EncodedBlock> blocks) {
-        assert(dests.size() == blocks.size());
-
-        for (int i = 0; i < dests.size; i++) { DecodeBlock(&dests[i], &blocks[i]); }
-    }
-
-    std::vector<Color> DecodeImage(uint8_t *bytes, unsigned image_width, unsigned image_height, unsigned chunk_size = 0, bool threaded = false) {
-        assert(threaded == chunk_size > 0);
+    std::vector<Color> DecodeImage(uint8_t *bytes, unsigned image_width, unsigned image_height) {
        unsigned block_width = maximum(1U, ((image_width + 3) / 4));
        unsigned block_height = maximum(1U, ((image_height + 3) / 4));
        using Row = typename DecodedBlock::Row;
@ -55,25 +48,26 @@ template <class B, size_t M, size_t N> class BlockDecoder {
        auto image = std::vector<Color>(block_width * block_height * N * M);
        auto blocks = reinterpret_cast<B *>(bytes);

-        if (!threaded) {
+        // from experimentation, multithreading this using OpenMP actually makes decoding slower
+        // due to thread creation/teardown taking longer than the decoding process itself.
+        // As a result, this is left as a serial operation despite being embarassingly parallelizable
+        for (unsigned y = 0; y < block_height; y++) {
            for (unsigned x = 0; x < block_width; x++) {
-                for (unsigned y = 0; y < block_height; y++) {
-                    unsigned pixel_x = x * N;
-                    unsigned pixel_y = y * M;
+                unsigned pixel_x = x * N;
+                unsigned pixel_y = y * M;

-                    assert(pixel_x >= 0);
-                    assert(pixel_y >= 0);
-                    assert(pixel_y + M <= image_height);
-                    assert(pixel_x + N <= image_width);
+                assert(pixel_x >= 0);
+                assert(pixel_y >= 0);
+                assert(pixel_y + M <= image_height);
+                assert(pixel_x + N <= image_width);

-                    unsigned top_left = pixel_x + (pixel_y * image_width);
-                    auto rows = std::array<Row *, M>();
-                    for (unsigned i = 0; i < M; i++) { rows[i] = reinterpret_cast<Row *>(&image[top_left + i * image_width]); }
+                unsigned top_left = pixel_x + (pixel_y * image_width);
+                auto rows = std::array<Row *, M>();
+                for (unsigned i = 0; i < M; i++) { rows[i] = reinterpret_cast<Row *>(&image[top_left + i * image_width]); }

-                    auto dest = DecodedBlock(&image[top_left],image_width);
+                auto dest = DecodedBlock(&image[top_left], image_width);

-                    DecodeBlock(dest, &blocks[x + block_width * y]);
-                }
+                DecodeBlock(dest, &blocks[x + block_width * y]);
            }
        }

--- a/src/BlockView.h
+++ b/src/BlockView.h
@ -54,9 +54,9 @@ template <typename S, size_t M, size_t N> class BlockView {

    BlockView(S *start, int row_stride = N, int pixel_stride = 1) : start(start), row_stride(row_stride), pixel_stride(pixel_stride) {}

-    constexpr Row operator[](size_t index) noexcept(ndebug) {
+    constexpr Row operator[](unsigned index) noexcept(ndebug) {
        assert(index < M);
-        return RowView<S, N>(&start[index * row_stride], pixel_stride);
+        return RowView<S, N>(&start[row_stride * (int)index], pixel_stride);
    }

    constexpr int width() noexcept { return N; }
@ -66,25 +66,25 @@ template <typename S, size_t M, size_t N> class BlockView {
    constexpr S &get(unsigned x, unsigned y) noexcept(ndebug) {
        assert(x < N);
        assert(y < M);
-        return start[(row_stride * y) + (pixel_stride * x)];
+        return start[(row_stride * (int)y) + (pixel_stride * (int)x)];
    }

    constexpr S get(unsigned x, unsigned y) const noexcept(ndebug) {
        assert(x < N);
        assert(y < M);
-        return start[(row_stride * y) + (pixel_stride * x)];
+        return start[(row_stride * (int)y) + (pixel_stride * (int)x)];
    }

    constexpr void set(unsigned x, unsigned y, S value) noexcept(ndebug) {
        assert(x < N);
        assert(y < M);
-        start[(row_stride * y) + (pixel_stride * x)] = value;
+        start[(row_stride * (int)y) + (pixel_stride * (int)x)] = value;
    }

    constexpr std::array<S, M * N> flatten() noexcept {
        std::array<S, M * N> result;
-        for (unsigned x = 0; x < N; x++) {
-            for (unsigned y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; }
+        for (int x = 0; x < N; x++) {
+            for (int y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; }
        }
        return result;
    }
@ -107,10 +107,7 @@ template <size_t M, size_t N> class ColorBlockView : public BlockView<Color, M,
        return ChannelView(channelStart, Base::row_stride * 4, Base::pixel_stride * 4);
    }

-    constexpr ChannelView GetR() noexcept(ndebug) { return GetChannel(0); };
-    constexpr ChannelView GetG() noexcept(ndebug) { return GetChannel(1); };
-    constexpr ChannelView GetB() noexcept(ndebug) { return GetChannel(2); };
-    constexpr ChannelView GetA() noexcept(ndebug) { return GetChannel(3); };
+    void SetRGB(unsigned x, unsigned y, Color value) noexcept(ndebug) { Base::get(x, y).SetRGB(value); }
 };

 using Color4x4 = ColorBlockView<4, 4>;
--- a/src/Color.cpp
+++ b/src/Color.cpp
@ -21,7 +21,7 @@

 #include <algorithm>  // for max, min

-#include "util.h"  // for scale5To8, scale8To5, Assert5Bit, scale6To8
+#include "util.h"  // for scale5To8, scale8To5, assert5bit, scale6To8

 // region Color implementation
 Color::Color() { SetRGBA(0, 0, 0, 0xFF); }
@ -29,9 +29,9 @@ Color::Color() { SetRGBA(0, 0, 0, 0xFF); }
 Color::Color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { SetRGBA(r, g, b, a); }

 uint16_t Color::Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b) {
-    Assert5Bit(r);
-    Assert6Bit(g);
-    Assert5Bit(b);
+    assert5bit(r);
+    assert6bit(g);
+    assert5bit(b);
    return static_cast<uint16_t>(b | (g << 5) | (r << 11));
 }

--- a/src/util.h
+++ b/src/util.h
@ -28,14 +28,8 @@
 #define UINT5_MAX 0x1FU  // 31
 #define UINT6_MAX 0x3FU  // 63

-template <typename S> constexpr void Assert5Bit(S x) {
-    static_assert(std::is_unsigned<S>::value);
-    assert(x <= UINT5_MAX);
-}
-template <typename S> constexpr void Assert6Bit(S x) {
-    static_assert(std::is_unsigned<S>::value);
-    assert(x <= UINT6_MAX);
-}
+#define assert5bit(x) assert(x <= UINT5_MAX)
+#define assert6bit(x) assert(x <= UINT6_MAX)

 template <typename S> constexpr auto iabs(S i) {
    static_assert(!std::is_unsigned<S>::value);
@ -113,12 +107,11 @@ template <typename S> constexpr S scale8To6(S v) {
 }

 template <typename S> constexpr S scale5To8(S v) {
-    Assert5Bit(v);
-    assert(v <= UINT5_MAX);
+    assert5bit(v);
    return static_cast<S>((v << 3) | (v >> 2));
 }
 template <typename S> constexpr S scale6To8(S v) {
-    Assert6Bit(v);
+    assert6bit(v);
    return static_cast<S>((v << 2) | (v >> 4));
 }

--- a/tools/CompilerWarnings.cmake
+++ b/tools/CompilerWarnings.cmake
@ -47,7 +47,7 @@ function(set_project_warnings project_name)
            -Wnon-virtual-dtor # warn the user if a class with virtual functions has a
            # non-virtual destructor. This helps catch hard to
            # track down memory errors
-            -Wold-style-cast # warn for c-style casts
+            #-Wold-style-cast # warn for c-style casts
            -Wcast-align     # warn for potential performance problem casts
            -Wunused         # warn on anything being unused
            -Woverloaded-virtual # warn if you overload (not override) a virtual