mirror of
https://github.com/drewcassidy/quicktex.git
synced 2024-09-13 06:37:34 +00:00
Attempt to batch some matrix ops
This commit is contained in:
parent
10ba6b2bd6
commit
3ceb028907
@ -361,46 +361,74 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
|||||||
row_type sqr_mag() const { return dot(*this); }
|
row_type sqr_mag() const { return dot(*this); }
|
||||||
|
|
||||||
Matrix abs() const {
|
Matrix abs() const {
|
||||||
Matrix ret;
|
Matrix res;
|
||||||
for (unsigned i = 0; i < N * M; i++) { ret.element(i) = quicktex::abs(element(i)); }
|
if constexpr (_batched) {
|
||||||
return ret;
|
auto lb = _batch_type::load_unaligned(&this->at(0));
|
||||||
}
|
lb = xsimd::abs(lb);
|
||||||
|
lb.store_unaligned(&res[0]);
|
||||||
Matrix clamp(T low, T high) {
|
} else {
|
||||||
Matrix ret;
|
for (unsigned i = 0; i < N * M; i++) { res.element(i) = quicktex::abs(element(i)); }
|
||||||
for (unsigned i = 0; i < N * M; i++) { ret.element(i) = quicktex::clamp(element(i), low, high); }
|
|
||||||
return ret;
|
|
||||||
}
|
|
||||||
|
|
||||||
Matrix clamp(const Matrix &low, const Matrix &high) {
|
|
||||||
Matrix ret;
|
|
||||||
for (unsigned i = 0; i < N * M; i++) {
|
|
||||||
ret.element(i) = quicktex::clamp(element(i), low.element(i), high.element(i));
|
|
||||||
}
|
}
|
||||||
return ret;
|
return res;
|
||||||
|
}
|
||||||
|
|
||||||
|
Matrix clamp(T low, T high) { return clamp(Matrix(low), Matrix(high)); }
|
||||||
|
Matrix clamp(const Matrix &low, const Matrix &high) {
|
||||||
|
Matrix res;
|
||||||
|
if constexpr (_batched) {
|
||||||
|
auto vb = _batch_type::load_unaligned(&this->at(0));
|
||||||
|
auto lb = _batch_type::load_unaligned(&low[0]);
|
||||||
|
auto hb = _batch_type::load_unaligned(&high[0]);
|
||||||
|
vb = quicktex::clamp(vb, lb, hb);
|
||||||
|
vb.store_unaligned(&res[0]);
|
||||||
|
} else {
|
||||||
|
for (unsigned m = 0; m < M; m++) {
|
||||||
|
res[m] = quicktex::clamp<row_type>(get_row(m), low.get_row(m), high.get_row(m));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
template <typename Op> static inline Matrix map(Matrix &lhs, Op f) {
|
template <typename Op> static inline Matrix map(const Matrix &lhs, Op f) {
|
||||||
Matrix ret;
|
Matrix res;
|
||||||
for (unsigned i = 0; i < lhs.size(); i++) { ret[i] = f(lhs[i]); }
|
if constexpr (_batched) {
|
||||||
return ret;
|
auto lb = _batch_type::load_unaligned(&lhs[0]);
|
||||||
|
auto resb = f(lb);
|
||||||
|
resb.store_unaligned(&res[0]);
|
||||||
|
} else {
|
||||||
|
for (unsigned i = 0; i < lhs.size(); i++) { res[i] = f(lhs[i]); }
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename R>
|
template <typename Op, typename R>
|
||||||
requires operable<R, T, Op>
|
requires operable<R, T, Op>
|
||||||
static inline Matrix map(const Matrix &lhs, const R &rhs, Op f) {
|
static inline Matrix map(const Matrix &lhs, const R &rhs, Op f) {
|
||||||
Matrix r;
|
Matrix res;
|
||||||
for (unsigned i = 0; i < lhs.size(); i++) { r[i] = f(lhs[i], rhs); }
|
if constexpr (_batched && operable<_batch_type, R, Op>) {
|
||||||
return r;
|
auto lb = _batch_type::load_unaligned(&lhs[0]);
|
||||||
|
auto resb = f(lb, rhs);
|
||||||
|
resb.store_unaligned(&res[0]);
|
||||||
|
} else {
|
||||||
|
for (unsigned i = 0; i < lhs.size(); i++) { res[i] = f(lhs[i], rhs); }
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename Op, typename R>
|
template <typename Op, typename R>
|
||||||
requires operable<R, T, Op>
|
requires operable<R, T, Op>
|
||||||
static inline Matrix map(const Matrix &lhs, const Matrix<R, N, M> &rhs, Op f) {
|
static inline Matrix map(const Matrix &lhs, const Matrix<R, N, M> &rhs, Op f) {
|
||||||
Matrix r;
|
Matrix res;
|
||||||
for (unsigned i = 0; i < lhs.size(); i++) { r[i] = f(lhs[i], rhs[i]); }
|
if constexpr (_batched && operable<_batch_type, _batch_type, Op>) {
|
||||||
return r;
|
auto lb = _batch_type::load_unaligned(&lhs[0]);
|
||||||
|
auto rb = xsimd::load_as<T>(&rhs[0], xsimd::unaligned_mode{});
|
||||||
|
auto resb = f(lb, rb);
|
||||||
|
resb.store_unaligned(&res[0]);
|
||||||
|
} else {
|
||||||
|
for (unsigned i = 0; i < lhs.size(); i++) { res[i] = f(lhs[i], rhs[i]); }
|
||||||
|
}
|
||||||
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
class column_iterator : public index_iterator_base<column_iterator> {
|
class column_iterator : public index_iterator_base<column_iterator> {
|
||||||
@ -438,26 +466,32 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
|||||||
private:
|
private:
|
||||||
V *_matrix;
|
V *_matrix;
|
||||||
};
|
};
|
||||||
};
|
|
||||||
|
|
||||||
template <typename T, size_t M, typename A = xsimd::default_arch> class BatchVec : Vec<xsimd::batch<T, A>, M> {
|
private:
|
||||||
template <size_t N, typename U = xsimd::unaligned_mode>
|
using _batch_type = std::conditional_t<N == 1, typename xsimd::make_sized_batch<T, M>::type, void>;
|
||||||
static BatchVec load_columns(const Matrix<T, N, M> &matrix, size_t column) {
|
static constexpr bool _batched = !std::is_void_v<_batch_type>;
|
||||||
const size_t batch_size = xsimd::batch<T, A>::size;
|
|
||||||
assert(column + batch_size <= N);
|
|
||||||
|
|
||||||
BatchVec ret;
|
// right now batched types are always the whole vector but that might change
|
||||||
for (unsigned i = 0; i < M; i++) { ret[i] = xsimd::load<A, T>(&(matrix[column][i]), U{}); }
|
template <bool b = true> using _chunk_type = std::conditional_t<b && _batched, _batch_type, row_type>;
|
||||||
return ret;
|
|
||||||
|
template <bool b = true> static constexpr size_t _chunk_count = b && _batched ? 1 : M;
|
||||||
|
|
||||||
|
template <bool b = true> inline _chunk_type<b> get_chunk(size_t i) const {
|
||||||
|
assert(i < _chunk_count<b>);
|
||||||
|
if constexpr (b && _batched) {
|
||||||
|
return _chunk_type<b>::load_unaligned(&(this->at(0)));
|
||||||
|
} else {
|
||||||
|
return get_row(i);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename U = xsimd::unaligned_mode, typename V, size_t N>
|
template <bool b = true> inline void set_chunk(size_t i, _chunk_type<b> &value) const {
|
||||||
void store_columns(Matrix<T, N, M> &matrix, size_t column) {
|
assert(i < _chunk_count<b>);
|
||||||
const size_t batch_size = xsimd::batch<T, A>::size;
|
if constexpr (b && _batched) {
|
||||||
assert(column + batch_size <= N);
|
xsimd::store_unaligned(&(this->at(0)), value);
|
||||||
|
} else {
|
||||||
for (unsigned i = 0; i < M; i++) { this->at(i).store((&(matrix[column][i]), U{})); }
|
set_row(i, value);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace quicktex
|
} // namespace quicktex
|
@ -27,17 +27,49 @@
|
|||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
#include "util/ranges.h"
|
||||||
#include "xsimd/xsimd.hpp"
|
#include "xsimd/xsimd.hpp"
|
||||||
|
|
||||||
namespace quicktex {
|
namespace quicktex {
|
||||||
|
|
||||||
|
namespace detail {
|
||||||
using std::abs; // abs overload for builtin types
|
using std::abs; // abs overload for builtin types
|
||||||
using xsimd::abs; // abs overload for xsimd buffers
|
using xsimd::abs; // abs overload for xsimd buffers
|
||||||
|
} // namespace detail
|
||||||
|
|
||||||
template <typename S> constexpr S clamp(S value, S low, S high) {
|
template <typename S>
|
||||||
|
requires requires(S &s) { s.abs(); }
|
||||||
|
constexpr S abs(S value) {
|
||||||
|
return value.abs();
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename S>
|
||||||
|
requires requires(S &s) { detail::abs(s); }
|
||||||
|
constexpr S abs(S value) {
|
||||||
|
return detail::abs(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename S>
|
||||||
|
requires requires(S &s) { s.clamp(s, s); }
|
||||||
|
constexpr S clamp(S value, S low, S high) {
|
||||||
|
assert(low <= high);
|
||||||
|
return value.clamp(low, high);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename S>
|
||||||
|
requires std::is_scalar_v<S>
|
||||||
|
constexpr S clamp(S value, S low, S high) {
|
||||||
assert(low <= high);
|
assert(low <= high);
|
||||||
if (value < low) return low;
|
if (value < low) return low;
|
||||||
if (value > high) return high;
|
if (value > high) return high;
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename S, typename A>
|
||||||
|
constexpr S clamp(xsimd::batch<S, A> value, const xsimd::batch<S, A> &low, const xsimd::batch<S, A> &high) {
|
||||||
|
value = xsimd::select(xsimd::lt(low), low, value);
|
||||||
|
value = xsimd::select(xsimd::gt(high), high, value);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace quicktex
|
} // namespace quicktex
|
@ -20,6 +20,7 @@
|
|||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <cstdint>
|
#include <cstdint>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
@ -147,5 +147,31 @@ TEST(Vec_int, copy) {
|
|||||||
|
|
||||||
EXPECT_EQ(out, arr);
|
EXPECT_EQ(out, arr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST(Vec_int, neg) {
|
||||||
|
auto a = Vec<int, 4>{1, 2, 3, 4};
|
||||||
|
|
||||||
|
expect_matrix_eq(-a, {-1, -2, -3, -4});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Vec_int, add) {
|
||||||
|
auto a = Vec<int, 4>{1, 2, 3, 4};
|
||||||
|
auto b = Vec<int, 4>{5, 6, 7, 8};
|
||||||
|
|
||||||
|
expect_matrix_eq(a + b, {6, 8, 10, 12});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Vec_int, sub) {
|
||||||
|
auto b = Vec<int, 4>{1, 2, 3, 4};
|
||||||
|
auto a = Vec<int, 4>{5, 6, 7, 8};
|
||||||
|
|
||||||
|
expect_matrix_eq(a - b, {4, 4, 4, 4});
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(Vec_int, abs) {
|
||||||
|
auto a = Vec<int, 4>{1, -5, -1, 0};
|
||||||
|
|
||||||
|
expect_matrix_eq(a.abs(), {1, 5, 1, 0});
|
||||||
|
}
|
||||||
// endregion
|
// endregion
|
||||||
} // namespace quicktex::tests
|
} // namespace quicktex::tests
|
Loading…
Reference in New Issue
Block a user