Various small tweaks, disable fat binary creation

faster-single-tables
Andrew Cassidy 3 years ago
parent 77cc6a8132
commit b42c52030d

@ -38,9 +38,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang") if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -fsanitize=undefined") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined")
set(PROJECT_WARNINGS ${CLANG_WARNINGS}) set(PROJECT_WARNINGS ${CLANG_WARNINGS})
if (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set_property(TARGET python_rgbcx test_rgbcx PROPERTY OSX_ARCHITECTURES_RELEASE x86_64 arm64) #Mach-O fat binary for arm and x86
endif ()
endif () endif ()

@ -40,14 +40,7 @@ template <class B, size_t M, size_t N> class BlockDecoder {
virtual void DecodeBlock(DecodedBlock dest, EncodedBlock *const block) const noexcept(ndebug) = 0; virtual void DecodeBlock(DecodedBlock dest, EncodedBlock *const block) const noexcept(ndebug) = 0;
void DecodeRow(std::span<DecodedBlock> dests, std::span<const EncodedBlock> blocks) { std::vector<Color> DecodeImage(uint8_t *bytes, unsigned image_width, unsigned image_height) {
assert(dests.size() == blocks.size());
for (int i = 0; i < dests.size; i++) { DecodeBlock(&dests[i], &blocks[i]); }
}
std::vector<Color> DecodeImage(uint8_t *bytes, unsigned image_width, unsigned image_height, unsigned chunk_size = 0, bool threaded = false) {
assert(threaded == chunk_size > 0);
unsigned block_width = maximum(1U, ((image_width + 3) / 4)); unsigned block_width = maximum(1U, ((image_width + 3) / 4));
unsigned block_height = maximum(1U, ((image_height + 3) / 4)); unsigned block_height = maximum(1U, ((image_height + 3) / 4));
using Row = typename DecodedBlock::Row; using Row = typename DecodedBlock::Row;
@ -55,25 +48,26 @@ template <class B, size_t M, size_t N> class BlockDecoder {
auto image = std::vector<Color>(block_width * block_height * N * M); auto image = std::vector<Color>(block_width * block_height * N * M);
auto blocks = reinterpret_cast<B *>(bytes); auto blocks = reinterpret_cast<B *>(bytes);
if (!threaded) { // from experimentation, multithreading this using OpenMP actually makes decoding slower
// due to thread creation/teardown taking longer than the decoding process itself.
// As a result, this is left as a serial operation despite being embarassingly parallelizable
for (unsigned y = 0; y < block_height; y++) {
for (unsigned x = 0; x < block_width; x++) { for (unsigned x = 0; x < block_width; x++) {
for (unsigned y = 0; y < block_height; y++) { unsigned pixel_x = x * N;
unsigned pixel_x = x * N; unsigned pixel_y = y * M;
unsigned pixel_y = y * M;
assert(pixel_x >= 0); assert(pixel_x >= 0);
assert(pixel_y >= 0); assert(pixel_y >= 0);
assert(pixel_y + M <= image_height); assert(pixel_y + M <= image_height);
assert(pixel_x + N <= image_width); assert(pixel_x + N <= image_width);
unsigned top_left = pixel_x + (pixel_y * image_width); unsigned top_left = pixel_x + (pixel_y * image_width);
auto rows = std::array<Row *, M>(); auto rows = std::array<Row *, M>();
for (unsigned i = 0; i < M; i++) { rows[i] = reinterpret_cast<Row *>(&image[top_left + i * image_width]); } for (unsigned i = 0; i < M; i++) { rows[i] = reinterpret_cast<Row *>(&image[top_left + i * image_width]); }
auto dest = DecodedBlock(&image[top_left],image_width); auto dest = DecodedBlock(&image[top_left], image_width);
DecodeBlock(dest, &blocks[x + block_width * y]); DecodeBlock(dest, &blocks[x + block_width * y]);
}
} }
} }

@ -54,9 +54,9 @@ template <typename S, size_t M, size_t N> class BlockView {
BlockView(S *start, int row_stride = N, int pixel_stride = 1) : start(start), row_stride(row_stride), pixel_stride(pixel_stride) {} BlockView(S *start, int row_stride = N, int pixel_stride = 1) : start(start), row_stride(row_stride), pixel_stride(pixel_stride) {}
constexpr Row operator[](size_t index) noexcept(ndebug) { constexpr Row operator[](unsigned index) noexcept(ndebug) {
assert(index < M); assert(index < M);
return RowView<S, N>(&start[index * row_stride], pixel_stride); return RowView<S, N>(&start[row_stride * (int)index], pixel_stride);
} }
constexpr int width() noexcept { return N; } constexpr int width() noexcept { return N; }
@ -66,25 +66,25 @@ template <typename S, size_t M, size_t N> class BlockView {
constexpr S &get(unsigned x, unsigned y) noexcept(ndebug) { constexpr S &get(unsigned x, unsigned y) noexcept(ndebug) {
assert(x < N); assert(x < N);
assert(y < M); assert(y < M);
return start[(row_stride * y) + (pixel_stride * x)]; return start[(row_stride * (int)y) + (pixel_stride * (int)x)];
} }
constexpr S get(unsigned x, unsigned y) const noexcept(ndebug) { constexpr S get(unsigned x, unsigned y) const noexcept(ndebug) {
assert(x < N); assert(x < N);
assert(y < M); assert(y < M);
return start[(row_stride * y) + (pixel_stride * x)]; return start[(row_stride * (int)y) + (pixel_stride * (int)x)];
} }
constexpr void set(unsigned x, unsigned y, S value) noexcept(ndebug) { constexpr void set(unsigned x, unsigned y, S value) noexcept(ndebug) {
assert(x < N); assert(x < N);
assert(y < M); assert(y < M);
start[(row_stride * y) + (pixel_stride * x)] = value; start[(row_stride * (int)y) + (pixel_stride * (int)x)] = value;
} }
constexpr std::array<S, M * N> flatten() noexcept { constexpr std::array<S, M * N> flatten() noexcept {
std::array<S, M * N> result; std::array<S, M * N> result;
for (unsigned x = 0; x < N; x++) { for (int x = 0; x < N; x++) {
for (unsigned y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; } for (int y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; }
} }
return result; return result;
} }
@ -107,10 +107,7 @@ template <size_t M, size_t N> class ColorBlockView : public BlockView<Color, M,
return ChannelView(channelStart, Base::row_stride * 4, Base::pixel_stride * 4); return ChannelView(channelStart, Base::row_stride * 4, Base::pixel_stride * 4);
} }
constexpr ChannelView GetR() noexcept(ndebug) { return GetChannel(0); }; void SetRGB(unsigned x, unsigned y, Color value) noexcept(ndebug) { Base::get(x, y).SetRGB(value); }
constexpr ChannelView GetG() noexcept(ndebug) { return GetChannel(1); };
constexpr ChannelView GetB() noexcept(ndebug) { return GetChannel(2); };
constexpr ChannelView GetA() noexcept(ndebug) { return GetChannel(3); };
}; };
using Color4x4 = ColorBlockView<4, 4>; using Color4x4 = ColorBlockView<4, 4>;

@ -21,7 +21,7 @@
#include <algorithm> // for max, min #include <algorithm> // for max, min
#include "util.h" // for scale5To8, scale8To5, Assert5Bit, scale6To8 #include "util.h" // for scale5To8, scale8To5, assert5bit, scale6To8
// region Color implementation // region Color implementation
Color::Color() { SetRGBA(0, 0, 0, 0xFF); } Color::Color() { SetRGBA(0, 0, 0, 0xFF); }
@ -29,9 +29,9 @@ Color::Color() { SetRGBA(0, 0, 0, 0xFF); }
Color::Color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { SetRGBA(r, g, b, a); } Color::Color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { SetRGBA(r, g, b, a); }
uint16_t Color::Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b) { uint16_t Color::Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b) {
Assert5Bit(r); assert5bit(r);
Assert6Bit(g); assert6bit(g);
Assert5Bit(b); assert5bit(b);
return static_cast<uint16_t>(b | (g << 5) | (r << 11)); return static_cast<uint16_t>(b | (g << 5) | (r << 11));
} }

@ -28,14 +28,8 @@
#define UINT5_MAX 0x1FU // 31 #define UINT5_MAX 0x1FU // 31
#define UINT6_MAX 0x3FU // 63 #define UINT6_MAX 0x3FU // 63
template <typename S> constexpr void Assert5Bit(S x) { #define assert5bit(x) assert(x <= UINT5_MAX)
static_assert(std::is_unsigned<S>::value); #define assert6bit(x) assert(x <= UINT6_MAX)
assert(x <= UINT5_MAX);
}
template <typename S> constexpr void Assert6Bit(S x) {
static_assert(std::is_unsigned<S>::value);
assert(x <= UINT6_MAX);
}
template <typename S> constexpr auto iabs(S i) { template <typename S> constexpr auto iabs(S i) {
static_assert(!std::is_unsigned<S>::value); static_assert(!std::is_unsigned<S>::value);
@ -113,12 +107,11 @@ template <typename S> constexpr S scale8To6(S v) {
} }
template <typename S> constexpr S scale5To8(S v) { template <typename S> constexpr S scale5To8(S v) {
Assert5Bit(v); assert5bit(v);
assert(v <= UINT5_MAX);
return static_cast<S>((v << 3) | (v >> 2)); return static_cast<S>((v << 3) | (v >> 2));
} }
template <typename S> constexpr S scale6To8(S v) { template <typename S> constexpr S scale6To8(S v) {
Assert6Bit(v); assert6bit(v);
return static_cast<S>((v << 2) | (v >> 4)); return static_cast<S>((v << 2) | (v >> 4));
} }

@ -47,7 +47,7 @@ function(set_project_warnings project_name)
-Wnon-virtual-dtor # warn the user if a class with virtual functions has a -Wnon-virtual-dtor # warn the user if a class with virtual functions has a
# non-virtual destructor. This helps catch hard to # non-virtual destructor. This helps catch hard to
# track down memory errors # track down memory errors
-Wold-style-cast # warn for c-style casts #-Wold-style-cast # warn for c-style casts
-Wcast-align # warn for potential performance problem casts -Wcast-align # warn for potential performance problem casts
-Wunused # warn on anything being unused -Wunused # warn on anything being unused
-Woverloaded-virtual # warn if you overload (not override) a virtual -Woverloaded-virtual # warn if you overload (not override) a virtual

Loading…
Cancel
Save