mirror of
https://github.com/drewcassidy/quicktex.git
synced 2024-09-13 06:37:34 +00:00
Add emulated 128-bit support and fix x86
This commit is contained in:
parent
f7b0cbe76b
commit
8168d6e249
@ -18,9 +18,7 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hwy/highway.h>
|
||||
|
||||
namespace hn = hwy::HWY_NAMESPACE;
|
||||
|
||||
#if HWY_TARGET == HWY_NEON
|
||||
@ -52,6 +50,11 @@ inline int32_t WideningSumS16(const Vec_s16 v) {
|
||||
#if HWY_TARGET == HWY_SCALAR
|
||||
// In Scalar mode this is a no-op, since there's only one lane
|
||||
return (int32_t)v.raw;
|
||||
#elif HWY_TARGET == HWY_EMU128
|
||||
// In emulated 128-bit mode, do the addition serially
|
||||
int acc = 0;
|
||||
for (unsigned i = 0; i < hn::MaxLanes(TagS16); i++) { acc += v.raw[i]; }
|
||||
return acc;
|
||||
#elif HWY_TARGET == HWY_NEON
|
||||
static_assert(hn::MaxLanes(TagS16) == 8);
|
||||
static_assert(hn::MaxLanes(TagS32) == 4);
|
||||
@ -66,7 +69,7 @@ inline int32_t WideningSumS16(const Vec_s16 v) {
|
||||
static_assert(hn::MaxLanes(TagS32) == 8);
|
||||
|
||||
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
|
||||
auto paired = Vec_s32(_mm256_madd_epi16(v.raw, __mm256_set1_epi16(1)));
|
||||
auto paired = Vec_s32{_mm256_madd_epi16(v.raw, __mm256_set1_epi16(1))};
|
||||
auto sums = SumOfLanes(paired);
|
||||
return hn::GetLane(sums);
|
||||
#else
|
||||
@ -74,7 +77,7 @@ inline int32_t WideningSumS16(const Vec_s16 v) {
|
||||
static_assert(hn::MaxLanes(TagS32) == 4);
|
||||
|
||||
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
|
||||
auto paired = Vec_s32(_mm_madd_epi16(v.raw, _mm_set1_epi16(1)));
|
||||
auto paired = Vec_s32{_mm_madd_epi16(v.raw, _mm_set1_epi16(1))};
|
||||
auto sums = SumOfLanes(paired);
|
||||
return hn::GetLane(sums);
|
||||
#endif
|
||||
|
Loading…
Reference in New Issue
Block a user