diff --git a/CMakeLists.txt b/CMakeLists.txt index cae5f43..d518b56 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -38,9 +38,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3") if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -fsanitize=undefined") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined") set(PROJECT_WARNINGS ${CLANG_WARNINGS}) - if (CMAKE_SYSTEM_NAME STREQUAL "Darwin") - set_property(TARGET python_rgbcx test_rgbcx PROPERTY OSX_ARCHITECTURES_RELEASE x86_64 arm64) #Mach-O fat binary for arm and x86 - endif () endif () diff --git a/src/BlockDecoder.h b/src/BlockDecoder.h index c1582c5..4e5299f 100644 --- a/src/BlockDecoder.h +++ b/src/BlockDecoder.h @@ -40,14 +40,7 @@ template class BlockDecoder { virtual void DecodeBlock(DecodedBlock dest, EncodedBlock *const block) const noexcept(ndebug) = 0; - void DecodeRow(std::span dests, std::span blocks) { - assert(dests.size() == blocks.size()); - - for (int i = 0; i < dests.size; i++) { DecodeBlock(&dests[i], &blocks[i]); } - } - - std::vector DecodeImage(uint8_t *bytes, unsigned image_width, unsigned image_height, unsigned chunk_size = 0, bool threaded = false) { - assert(threaded == chunk_size > 0); + std::vector DecodeImage(uint8_t *bytes, unsigned image_width, unsigned image_height) { unsigned block_width = maximum(1U, ((image_width + 3) / 4)); unsigned block_height = maximum(1U, ((image_height + 3) / 4)); using Row = typename DecodedBlock::Row; @@ -55,25 +48,26 @@ template class BlockDecoder { auto image = std::vector(block_width * block_height * N * M); auto blocks = reinterpret_cast(bytes); - if (!threaded) { + // from experimentation, multithreading this using OpenMP actually makes decoding slower + // due to thread creation/teardown taking longer than the decoding process itself. + // As a result, this is left as a serial operation despite being embarassingly parallelizable + for (unsigned y = 0; y < block_height; y++) { for (unsigned x = 0; x < block_width; x++) { - for (unsigned y = 0; y < block_height; y++) { - unsigned pixel_x = x * N; - unsigned pixel_y = y * M; + unsigned pixel_x = x * N; + unsigned pixel_y = y * M; - assert(pixel_x >= 0); - assert(pixel_y >= 0); - assert(pixel_y + M <= image_height); - assert(pixel_x + N <= image_width); + assert(pixel_x >= 0); + assert(pixel_y >= 0); + assert(pixel_y + M <= image_height); + assert(pixel_x + N <= image_width); - unsigned top_left = pixel_x + (pixel_y * image_width); - auto rows = std::array(); - for (unsigned i = 0; i < M; i++) { rows[i] = reinterpret_cast(&image[top_left + i * image_width]); } + unsigned top_left = pixel_x + (pixel_y * image_width); + auto rows = std::array(); + for (unsigned i = 0; i < M; i++) { rows[i] = reinterpret_cast(&image[top_left + i * image_width]); } - auto dest = DecodedBlock(&image[top_left],image_width); + auto dest = DecodedBlock(&image[top_left], image_width); - DecodeBlock(dest, &blocks[x + block_width * y]); - } + DecodeBlock(dest, &blocks[x + block_width * y]); } } diff --git a/src/BlockView.h b/src/BlockView.h index 6193631..a8f6525 100644 --- a/src/BlockView.h +++ b/src/BlockView.h @@ -54,9 +54,9 @@ template class BlockView { BlockView(S *start, int row_stride = N, int pixel_stride = 1) : start(start), row_stride(row_stride), pixel_stride(pixel_stride) {} - constexpr Row operator[](size_t index) noexcept(ndebug) { + constexpr Row operator[](unsigned index) noexcept(ndebug) { assert(index < M); - return RowView(&start[index * row_stride], pixel_stride); + return RowView(&start[row_stride * (int)index], pixel_stride); } constexpr int width() noexcept { return N; } @@ -66,25 +66,25 @@ template class BlockView { constexpr S &get(unsigned x, unsigned y) noexcept(ndebug) { assert(x < N); assert(y < M); - return start[(row_stride * y) + (pixel_stride * x)]; + return start[(row_stride * (int)y) + (pixel_stride * (int)x)]; } constexpr S get(unsigned x, unsigned y) const noexcept(ndebug) { assert(x < N); assert(y < M); - return start[(row_stride * y) + (pixel_stride * x)]; + return start[(row_stride * (int)y) + (pixel_stride * (int)x)]; } constexpr void set(unsigned x, unsigned y, S value) noexcept(ndebug) { assert(x < N); assert(y < M); - start[(row_stride * y) + (pixel_stride * x)] = value; + start[(row_stride * (int)y) + (pixel_stride * (int)x)] = value; } constexpr std::array flatten() noexcept { std::array result; - for (unsigned x = 0; x < N; x++) { - for (unsigned y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; } + for (int x = 0; x < N; x++) { + for (int y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; } } return result; } @@ -107,10 +107,7 @@ template class ColorBlockView : public BlockView; diff --git a/src/Color.cpp b/src/Color.cpp index 745feba..a6ed99c 100644 --- a/src/Color.cpp +++ b/src/Color.cpp @@ -21,7 +21,7 @@ #include // for max, min -#include "util.h" // for scale5To8, scale8To5, Assert5Bit, scale6To8 +#include "util.h" // for scale5To8, scale8To5, assert5bit, scale6To8 // region Color implementation Color::Color() { SetRGBA(0, 0, 0, 0xFF); } @@ -29,9 +29,9 @@ Color::Color() { SetRGBA(0, 0, 0, 0xFF); } Color::Color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { SetRGBA(r, g, b, a); } uint16_t Color::Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b) { - Assert5Bit(r); - Assert6Bit(g); - Assert5Bit(b); + assert5bit(r); + assert6bit(g); + assert5bit(b); return static_cast(b | (g << 5) | (r << 11)); } diff --git a/src/util.h b/src/util.h index b9cbaac..b6f7d1c 100644 --- a/src/util.h +++ b/src/util.h @@ -28,14 +28,8 @@ #define UINT5_MAX 0x1FU // 31 #define UINT6_MAX 0x3FU // 63 -template constexpr void Assert5Bit(S x) { - static_assert(std::is_unsigned::value); - assert(x <= UINT5_MAX); -} -template constexpr void Assert6Bit(S x) { - static_assert(std::is_unsigned::value); - assert(x <= UINT6_MAX); -} +#define assert5bit(x) assert(x <= UINT5_MAX) +#define assert6bit(x) assert(x <= UINT6_MAX) template constexpr auto iabs(S i) { static_assert(!std::is_unsigned::value); @@ -113,12 +107,11 @@ template constexpr S scale8To6(S v) { } template constexpr S scale5To8(S v) { - Assert5Bit(v); - assert(v <= UINT5_MAX); + assert5bit(v); return static_cast((v << 3) | (v >> 2)); } template constexpr S scale6To8(S v) { - Assert6Bit(v); + assert6bit(v); return static_cast((v << 2) | (v >> 4)); } diff --git a/tools/CompilerWarnings.cmake b/tools/CompilerWarnings.cmake index a3dfbb1..83712a0 100644 --- a/tools/CompilerWarnings.cmake +++ b/tools/CompilerWarnings.cmake @@ -47,7 +47,7 @@ function(set_project_warnings project_name) -Wnon-virtual-dtor # warn the user if a class with virtual functions has a # non-virtual destructor. This helps catch hard to # track down memory errors - -Wold-style-cast # warn for c-style casts + #-Wold-style-cast # warn for c-style casts -Wcast-align # warn for potential performance problem casts -Wunused # warn on anything being unused -Woverloaded-virtual # warn if you overload (not override) a virtual