Various small tweaks, disable fat binary creation

faster-single-tables
Andrew Cassidy 3 years ago
parent 77cc6a8132
commit b42c52030d

@ -38,9 +38,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -O3")
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -fsanitize=undefined")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined")
set(PROJECT_WARNINGS ${CLANG_WARNINGS})
if (CMAKE_SYSTEM_NAME STREQUAL "Darwin")
set_property(TARGET python_rgbcx test_rgbcx PROPERTY OSX_ARCHITECTURES_RELEASE x86_64 arm64) #Mach-O fat binary for arm and x86
endif ()
endif ()

@ -40,14 +40,7 @@ template <class B, size_t M, size_t N> class BlockDecoder {
virtual void DecodeBlock(DecodedBlock dest, EncodedBlock *const block) const noexcept(ndebug) = 0;
void DecodeRow(std::span<DecodedBlock> dests, std::span<const EncodedBlock> blocks) {
assert(dests.size() == blocks.size());
for (int i = 0; i < dests.size; i++) { DecodeBlock(&dests[i], &blocks[i]); }
}
std::vector<Color> DecodeImage(uint8_t *bytes, unsigned image_width, unsigned image_height, unsigned chunk_size = 0, bool threaded = false) {
assert(threaded == chunk_size > 0);
std::vector<Color> DecodeImage(uint8_t *bytes, unsigned image_width, unsigned image_height) {
unsigned block_width = maximum(1U, ((image_width + 3) / 4));
unsigned block_height = maximum(1U, ((image_height + 3) / 4));
using Row = typename DecodedBlock::Row;
@ -55,25 +48,26 @@ template <class B, size_t M, size_t N> class BlockDecoder {
auto image = std::vector<Color>(block_width * block_height * N * M);
auto blocks = reinterpret_cast<B *>(bytes);
if (!threaded) {
// from experimentation, multithreading this using OpenMP actually makes decoding slower
// due to thread creation/teardown taking longer than the decoding process itself.
// As a result, this is left as a serial operation despite being embarassingly parallelizable
for (unsigned y = 0; y < block_height; y++) {
for (unsigned x = 0; x < block_width; x++) {
for (unsigned y = 0; y < block_height; y++) {
unsigned pixel_x = x * N;
unsigned pixel_y = y * M;
unsigned pixel_x = x * N;
unsigned pixel_y = y * M;
assert(pixel_x >= 0);
assert(pixel_y >= 0);
assert(pixel_y + M <= image_height);
assert(pixel_x + N <= image_width);
assert(pixel_x >= 0);
assert(pixel_y >= 0);
assert(pixel_y + M <= image_height);
assert(pixel_x + N <= image_width);
unsigned top_left = pixel_x + (pixel_y * image_width);
auto rows = std::array<Row *, M>();
for (unsigned i = 0; i < M; i++) { rows[i] = reinterpret_cast<Row *>(&image[top_left + i * image_width]); }
unsigned top_left = pixel_x + (pixel_y * image_width);
auto rows = std::array<Row *, M>();
for (unsigned i = 0; i < M; i++) { rows[i] = reinterpret_cast<Row *>(&image[top_left + i * image_width]); }
auto dest = DecodedBlock(&image[top_left],image_width);
auto dest = DecodedBlock(&image[top_left], image_width);
DecodeBlock(dest, &blocks[x + block_width * y]);
}
DecodeBlock(dest, &blocks[x + block_width * y]);
}
}

@ -54,9 +54,9 @@ template <typename S, size_t M, size_t N> class BlockView {
BlockView(S *start, int row_stride = N, int pixel_stride = 1) : start(start), row_stride(row_stride), pixel_stride(pixel_stride) {}
constexpr Row operator[](size_t index) noexcept(ndebug) {
constexpr Row operator[](unsigned index) noexcept(ndebug) {
assert(index < M);
return RowView<S, N>(&start[index * row_stride], pixel_stride);
return RowView<S, N>(&start[row_stride * (int)index], pixel_stride);
}
constexpr int width() noexcept { return N; }
@ -66,25 +66,25 @@ template <typename S, size_t M, size_t N> class BlockView {
constexpr S &get(unsigned x, unsigned y) noexcept(ndebug) {
assert(x < N);
assert(y < M);
return start[(row_stride * y) + (pixel_stride * x)];
return start[(row_stride * (int)y) + (pixel_stride * (int)x)];
}
constexpr S get(unsigned x, unsigned y) const noexcept(ndebug) {
assert(x < N);
assert(y < M);
return start[(row_stride * y) + (pixel_stride * x)];
return start[(row_stride * (int)y) + (pixel_stride * (int)x)];
}
constexpr void set(unsigned x, unsigned y, S value) noexcept(ndebug) {
assert(x < N);
assert(y < M);
start[(row_stride * y) + (pixel_stride * x)] = value;
start[(row_stride * (int)y) + (pixel_stride * (int)x)] = value;
}
constexpr std::array<S, M * N> flatten() noexcept {
std::array<S, M * N> result;
for (unsigned x = 0; x < N; x++) {
for (unsigned y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; }
for (int x = 0; x < N; x++) {
for (int y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; }
}
return result;
}
@ -107,10 +107,7 @@ template <size_t M, size_t N> class ColorBlockView : public BlockView<Color, M,
return ChannelView(channelStart, Base::row_stride * 4, Base::pixel_stride * 4);
}
constexpr ChannelView GetR() noexcept(ndebug) { return GetChannel(0); };
constexpr ChannelView GetG() noexcept(ndebug) { return GetChannel(1); };
constexpr ChannelView GetB() noexcept(ndebug) { return GetChannel(2); };
constexpr ChannelView GetA() noexcept(ndebug) { return GetChannel(3); };
void SetRGB(unsigned x, unsigned y, Color value) noexcept(ndebug) { Base::get(x, y).SetRGB(value); }
};
using Color4x4 = ColorBlockView<4, 4>;

@ -21,7 +21,7 @@
#include <algorithm> // for max, min
#include "util.h" // for scale5To8, scale8To5, Assert5Bit, scale6To8
#include "util.h" // for scale5To8, scale8To5, assert5bit, scale6To8
// region Color implementation
Color::Color() { SetRGBA(0, 0, 0, 0xFF); }
@ -29,9 +29,9 @@ Color::Color() { SetRGBA(0, 0, 0, 0xFF); }
Color::Color(uint8_t r, uint8_t g, uint8_t b, uint8_t a) { SetRGBA(r, g, b, a); }
uint16_t Color::Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b) {
Assert5Bit(r);
Assert6Bit(g);
Assert5Bit(b);
assert5bit(r);
assert6bit(g);
assert5bit(b);
return static_cast<uint16_t>(b | (g << 5) | (r << 11));
}

@ -28,14 +28,8 @@
#define UINT5_MAX 0x1FU // 31
#define UINT6_MAX 0x3FU // 63
template <typename S> constexpr void Assert5Bit(S x) {
static_assert(std::is_unsigned<S>::value);
assert(x <= UINT5_MAX);
}
template <typename S> constexpr void Assert6Bit(S x) {
static_assert(std::is_unsigned<S>::value);
assert(x <= UINT6_MAX);
}
#define assert5bit(x) assert(x <= UINT5_MAX)
#define assert6bit(x) assert(x <= UINT6_MAX)
template <typename S> constexpr auto iabs(S i) {
static_assert(!std::is_unsigned<S>::value);
@ -113,12 +107,11 @@ template <typename S> constexpr S scale8To6(S v) {
}
template <typename S> constexpr S scale5To8(S v) {
Assert5Bit(v);
assert(v <= UINT5_MAX);
assert5bit(v);
return static_cast<S>((v << 3) | (v >> 2));
}
template <typename S> constexpr S scale6To8(S v) {
Assert6Bit(v);
assert6bit(v);
return static_cast<S>((v << 2) | (v >> 4));
}

@ -47,7 +47,7 @@ function(set_project_warnings project_name)
-Wnon-virtual-dtor # warn the user if a class with virtual functions has a
# non-virtual destructor. This helps catch hard to
# track down memory errors
-Wold-style-cast # warn for c-style casts
#-Wold-style-cast # warn for c-style casts
-Wcast-align # warn for potential performance problem casts
-Wunused # warn on anything being unused
-Woverloaded-virtual # warn if you overload (not override) a virtual

Loading…
Cancel
Save