mirror of
https://github.com/drewcassidy/quicktex.git
synced 2024-09-13 06:37:34 +00:00
improved clamp and sum
This commit is contained in:
@ -342,26 +342,48 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
||||
}
|
||||
|
||||
// sum up all columns
|
||||
column_type hsum() const { return std::accumulate(column_begin(), column_end(), column_type{}); }
|
||||
column_type hsum() const {
|
||||
if constexpr (N == 1) { return *this; }
|
||||
if constexpr (M == 1) { return sum(); }
|
||||
for (unsigned i = 0; i < M; i++) {}
|
||||
return _map<column_type>([](auto row) { return quicktex::sum(row); }, *this);
|
||||
}
|
||||
|
||||
// sum up all rows
|
||||
row_type vsum() const { return std::accumulate(begin(), end(), row_type{}); }
|
||||
row_type vsum() const {
|
||||
if constexpr (N == 1) { return sum(); }
|
||||
if constexpr (M == 1) { return *this; }
|
||||
return std::accumulate(begin(), end(), row_type{});
|
||||
}
|
||||
|
||||
// sum up all values
|
||||
T sum() const { return std::accumulate(all_begin(), all_end(), T{}); }
|
||||
T sum() const {
|
||||
constexpr bool b = _batched && operable<_chunk_type<>, _chunk_type<>, std::plus<>>;
|
||||
_chunk_type<b> s = _get_chunk<b>(0);
|
||||
|
||||
// add up all chunks in parallel
|
||||
// chunk is scalar -> this is doing the actual sum
|
||||
// chunk is vector -> this is doing a vadd
|
||||
// chunk is simd -> this is doing an add across the simd
|
||||
for (unsigned i = 1; i < _chunk_count<b>; i++) {
|
||||
auto c = _get_chunk<b>(i);
|
||||
s += c;
|
||||
}
|
||||
|
||||
// now hadd the sum
|
||||
// if chunk is scalar, this is a noop
|
||||
return quicktex::sum(s);
|
||||
}
|
||||
|
||||
template <typename R, size_t P>
|
||||
requires operable<R, T, std::multiplies<>>
|
||||
Matrix<T, M, P> mult(const Matrix<R, N, P> &rhs) const {
|
||||
auto rt = rhs.transpose();
|
||||
Matrix<T, M, P> res(0);
|
||||
for (unsigned p = 0; p < P; p++) {
|
||||
// for each column of the RHS/Result
|
||||
for (unsigned m = 0; m < M; m++) {
|
||||
// for each row of the LHS/Result
|
||||
for (unsigned n = 0; n < N; n++) {
|
||||
res.element(m, p) += element(m, n) * rhs.element(n, p);
|
||||
}
|
||||
for (unsigned n = 0; n < N; n++) { res.element(m, p) += element(m, n) * rhs.element(n, p); }
|
||||
}
|
||||
}
|
||||
return res;
|
||||
@ -378,9 +400,7 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
||||
Matrix mirror() const {
|
||||
Matrix result = *this;
|
||||
for (unsigned n = 0; n < N - 1; n++) {
|
||||
for (unsigned m = (n + 1); m < M; m++) {
|
||||
result.element(m,n) = result.element(n,m);
|
||||
}
|
||||
for (unsigned m = (n + 1); m < M; m++) { result.element(m, n) = result.element(n, m); }
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -388,19 +408,23 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
||||
// dot product of two compatible matrices
|
||||
template <typename R>
|
||||
requires(N == 1) && operable<T, R, std::multiplies<>> && operable<T, T, std::plus<>>
|
||||
row_type dot(const Matrix<R, M, N> &rhs) const {
|
||||
inline row_type dot(const Matrix<R, M, N> &rhs) const {
|
||||
// technically this is Lt * R, but the vsum method is probably faster/more readable
|
||||
// than allocationg a new transpose matrix
|
||||
Matrix product = *this * rhs;
|
||||
return product.vsum();
|
||||
}
|
||||
|
||||
row_type sqr_mag() const { return dot(*this); }
|
||||
inline row_type sqr_mag() const { return dot(*this); }
|
||||
|
||||
Matrix abs() const { return _map(&quicktex::abs<_chunk_type<>>, *this); }
|
||||
inline Matrix abs() const {
|
||||
return _map([](auto c) { return quicktex::abs(c); }, *this);
|
||||
}
|
||||
|
||||
Matrix clamp(T low, T high) { return _map(&quicktex::clamp<_chunk_type<>>, *this, low, high); }
|
||||
Matrix clamp(const Matrix &low, const Matrix &high) {
|
||||
inline Matrix clamp(T low, T high) {
|
||||
return _map([low, high](_chunk_type<> c) { return quicktex::clamp(c, low, high); }, *this);
|
||||
}
|
||||
inline Matrix clamp(const Matrix &low, const Matrix &high) {
|
||||
return _map(&quicktex::clamp<_chunk_type<>>, *this, low, high);
|
||||
}
|
||||
|
||||
@ -476,13 +500,14 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
||||
* @param args additional scalar arguments
|
||||
* @return vector mapped with f(lhs[i], args)
|
||||
*/
|
||||
template <typename Op, typename... Args> inline static Matrix _map(Op f, const Matrix &lhs, Args... args) {
|
||||
Matrix result;
|
||||
constexpr bool b = _batched && std::is_invocable_r_v<_chunk_type<>, Op, _chunk_type<>, Args...>;
|
||||
template <typename Result = Matrix, typename Op, typename... Args>
|
||||
inline static Result _map(Op f, const Matrix &lhs, Args... args) {
|
||||
Result result;
|
||||
constexpr bool b = _batched && std::is_invocable_v<_chunk_type<>, Op, _chunk_type<>, Args...>;
|
||||
for (unsigned i = 0; i < _chunk_count<b>; i++) {
|
||||
auto c = lhs._get_chunk<b>(i);
|
||||
auto resultc = f(c, args...);
|
||||
result._set_chunk<b>(i, resultc);
|
||||
result.template _set_chunk<b>(i, resultc);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -497,15 +522,15 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
||||
* @param args additional scalar arguments
|
||||
* @return vector mapped with f(lhs[i], rhs[i], args)
|
||||
*/
|
||||
template <typename Op, typename... Args>
|
||||
inline static Matrix _map(Op f, const Matrix &lhs, const Matrix &rhs, Args... args) {
|
||||
Matrix result;
|
||||
template <typename Result = Matrix, typename Op, typename... Args>
|
||||
inline static Result _map(Op f, const Matrix &lhs, const Matrix &rhs, Args... args) {
|
||||
Result result;
|
||||
constexpr bool b = _batched && std::is_invocable_r_v<_chunk_type<>, Op, _chunk_type<>, _chunk_type<>, Args...>;
|
||||
for (unsigned i = 0; i < _chunk_count<b>; i++) {
|
||||
auto lc = lhs._get_chunk<b>(i);
|
||||
auto rc = rhs._get_chunk<b>(i);
|
||||
auto resultc = f(lc, rc, args...);
|
||||
result._set_chunk<b>(i, resultc);
|
||||
result.template _set_chunk<b>(i, resultc);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -521,9 +546,9 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
||||
* @param args additional scalar arguments
|
||||
* @return vector mapped with f(lhs[i], rhs1[i], rhs2[i], args)
|
||||
*/
|
||||
template <typename Op, typename... Args>
|
||||
inline static Matrix _map(Op f, const Matrix &lhs, const Matrix &rhs1, const Matrix &rhs2, Args... args) {
|
||||
Matrix result;
|
||||
template <typename Result = Matrix, typename Op, typename... Args>
|
||||
inline static Result _map(Op f, const Matrix &lhs, const Matrix &rhs1, const Matrix &rhs2, Args... args) {
|
||||
Result result;
|
||||
constexpr bool b =
|
||||
_batched && std::is_invocable_r_v<_chunk_type<>, Op, _chunk_type<>, _chunk_type<>, _chunk_type<>, Args...>;
|
||||
for (unsigned i = 0; i < _chunk_count<b>; i++) {
|
||||
@ -531,7 +556,7 @@ class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, N>>, M> {
|
||||
auto r1c = rhs1._get_chunk<b>(i);
|
||||
auto r2c = rhs2._get_chunk<b>(i);
|
||||
auto resultc = f(lc, r1c, r2c, args...);
|
||||
result._set_chunk<b>(i, resultc);
|
||||
result.template _set_chunk<b>(i, resultc);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
@ -32,10 +32,8 @@
|
||||
|
||||
namespace quicktex {
|
||||
|
||||
namespace detail {
|
||||
using std::abs; // abs overload for builtin types
|
||||
using xsimd::abs; // abs overload for xsimd buffers
|
||||
} // namespace detail
|
||||
|
||||
template <typename S>
|
||||
requires requires(S &s) { s.abs(); }
|
||||
@ -43,19 +41,6 @@ constexpr S abs(S value) {
|
||||
return value.abs();
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
requires requires(S &s) { detail::abs(s); }
|
||||
constexpr S abs(S value) {
|
||||
return detail::abs(value);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
requires requires(S &s) { s.clamp(s, s); }
|
||||
constexpr S clamp(S value, S low, S high) {
|
||||
assert(low <= high);
|
||||
return value.clamp(low, high);
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
requires std::is_scalar_v<S>
|
||||
constexpr S clamp(S value, S low, S high) {
|
||||
@ -66,10 +51,28 @@ constexpr S clamp(S value, S low, S high) {
|
||||
}
|
||||
|
||||
template <typename S, typename A>
|
||||
constexpr S clamp(xsimd::batch<S, A> value, const xsimd::batch<S, A> &low, const xsimd::batch<S, A> &high) {
|
||||
value = xsimd::select(xsimd::lt(low), low, value);
|
||||
value = xsimd::select(xsimd::gt(high), high, value);
|
||||
return value;
|
||||
constexpr xsimd::batch<S, A> clamp(xsimd::batch<S, A> value, const xsimd::batch<S, A> &low,
|
||||
const xsimd::batch<S, A> &high) {
|
||||
return xsimd::clip(value, low, high);
|
||||
}
|
||||
|
||||
template <typename S, typename A>
|
||||
constexpr xsimd::batch<S, A> clamp(xsimd::batch<S, A> value, const S &low, const S &high) {
|
||||
return clamp(value, xsimd::broadcast(low), xsimd::broadcast(high));
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
requires requires(S &s) { s.sum(); }
|
||||
constexpr auto sum(S value) {
|
||||
return value.sum();
|
||||
}
|
||||
|
||||
template <typename S>
|
||||
requires std::is_scalar_v<S>
|
||||
constexpr auto sum(S value) {
|
||||
return value;
|
||||
// horizontally adding a scalar is a noop
|
||||
}
|
||||
|
||||
template <typename S, typename A> constexpr auto sum(xsimd::batch<S, A> value) { return xsimd::hadd(value); }
|
||||
} // namespace quicktex
|
Reference in New Issue
Block a user