mirror of
https://github.com/drewcassidy/quicktex.git
synced 2024-09-13 06:37:34 +00:00
Remove Highway
This commit is contained in:
parent
04fece2771
commit
79f77a24b2
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -1,3 +0,0 @@
|
||||
[submodule "external/highway"]
|
||||
path = external/highway
|
||||
url = https://github.com/google/highway.git
|
@ -1,27 +1,14 @@
|
||||
cmake_minimum_required(VERSION 3.18)
|
||||
|
||||
if(MSVC AND ENV{CC} MATCHES ".*clang-cl.*")
|
||||
set(CMAKE_GENERATOR_TOOLSET "ClangCL")
|
||||
set(CLANG_CL TRUE)
|
||||
else()
|
||||
set(CLANG_CL FALSE)
|
||||
endif()
|
||||
|
||||
include(tools/CompilerWarnings.cmake)
|
||||
include(tools/CPUFeatures.cmake)
|
||||
set(CMAKE_VERBOSE_MAKEFILE ON)
|
||||
|
||||
set(HWY_ENABLE_INSTALL OFF)
|
||||
set(HWY_ENABLE_EXAMPLES OFF)
|
||||
option(BUILD_TESTING "" OFF)
|
||||
|
||||
project(quicktex)
|
||||
|
||||
# Find dependencies
|
||||
find_package(Python COMPONENTS Interpreter Development.Module)
|
||||
find_package(pybind11 CONFIG REQUIRED)
|
||||
find_package(OpenMP)
|
||||
add_subdirectory(external/highway)
|
||||
|
||||
# Collect source files
|
||||
file(GLOB SOURCE_FILES
|
||||
|
1
external/highway
vendored
1
external/highway
vendored
@ -1 +0,0 @@
|
||||
Subproject commit fa49471c43ba37c528dd77b0ef060cdc313054bb
|
@ -18,70 +18,6 @@
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
#include <hwy/highway.h>
|
||||
namespace hn = hwy::HWY_NAMESPACE;
|
||||
|
||||
#if HWY_TARGET == HWY_NEON
|
||||
#include <arm_neon.h>
|
||||
#elif HWY_ARCH_X86_64
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
namespace quicktex {
|
||||
|
||||
using Tag_s16 = hn::ScalableTag<int16_t>;
|
||||
using Vec_s16 = hn::Vec<Tag_s16>;
|
||||
using Tag_s32 = hn::ScalableTag<int32_t>;
|
||||
using Vec_s32 = hn::Vec<Tag_s32>;
|
||||
|
||||
const Tag_s16 TagS16;
|
||||
const Tag_s32 TagS32;
|
||||
|
||||
/// Helper function for doing sum-of-lanes without a tag lvalue. We're not targeting SVE (yet) so this should work fine.
|
||||
/// \tparam V Vector type to sum (8- and 16-bit integers are NOT supported)
|
||||
/// \param v Vector to sum
|
||||
/// \return The sum of all lanes in each lane.
|
||||
template <typename V> inline V SumOfLanes(V v) {
|
||||
hn::DFromV<V> tag;
|
||||
return hn::SumOfLanes(tag, v);
|
||||
}
|
||||
|
||||
inline int32_t WideningSumS16(const Vec_s16 v) {
|
||||
#if HWY_TARGET == HWY_SCALAR
|
||||
// In Scalar mode this is a no-op, since there's only one lane
|
||||
return (int32_t)v.raw;
|
||||
#elif HWY_TARGET == HWY_EMU128
|
||||
// In emulated 128-bit mode, do the addition serially
|
||||
int acc = 0;
|
||||
for (unsigned i = 0; i < hn::MaxLanes(TagS16); i++) { acc += v.raw[i]; }
|
||||
return acc;
|
||||
#elif HWY_TARGET == HWY_NEON
|
||||
static_assert(hn::MaxLanes(TagS16) == 8);
|
||||
static_assert(hn::MaxLanes(TagS32) == 4);
|
||||
|
||||
// Pairwise widening sum, then sum all N/2 widened lanes
|
||||
auto paired = Vec_s32(vpaddlq_s16(v.raw));
|
||||
auto sums = SumOfLanes(paired);
|
||||
return hn::GetLane(sums);
|
||||
#elif HWY_ARCH_X86_64
|
||||
#if HWY_TARGET == HWY_AVX2 || HWY_TARGET == HWY_AVX3
|
||||
static_assert(hn::MaxLanes(TagS16) == 16);
|
||||
static_assert(hn::MaxLanes(TagS32) == 8);
|
||||
|
||||
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
|
||||
auto paired = Vec_s32{_mm256_madd_epi16(v.raw, _mm256_set1_epi16(1))};
|
||||
auto sums = SumOfLanes(paired);
|
||||
return hn::GetLane(sums);
|
||||
#else
|
||||
static_assert(hn::MaxLanes(TagS16) == 8);
|
||||
static_assert(hn::MaxLanes(TagS32) == 4);
|
||||
|
||||
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
|
||||
auto paired = Vec_s32{_mm_madd_epi16(v.raw, _mm_set1_epi16(1))};
|
||||
auto sums = SumOfLanes(paired);
|
||||
return hn::GetLane(sums);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace quicktex
|
@ -19,8 +19,6 @@
|
||||
|
||||
#include "TestSIMD.h"
|
||||
|
||||
#include <hwy/highway.h>
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
@ -28,38 +26,6 @@
|
||||
|
||||
#include "../VecUtil.h"
|
||||
|
||||
namespace hn = hwy::HWY_NAMESPACE;
|
||||
|
||||
namespace quicktex::tests {
|
||||
|
||||
void TestWidenSumS16() {
|
||||
const hn::ScalableTag<int16_t> tag;
|
||||
const auto vec_size = hn::MaxLanes(tag);
|
||||
std::array<int16_t, vec_size> buffer;
|
||||
|
||||
std::iota(buffer.begin(), buffer.end(), 1);
|
||||
auto v = hn::Load(tag, &buffer[0]);
|
||||
auto sum = WideningSumS16(v);
|
||||
assert(sum == vec_size / 2 * (vec_size + 1)); // Gauss formula
|
||||
|
||||
buffer.fill(1);
|
||||
v = hn::Load(tag, &buffer[0]);
|
||||
sum = WideningSumS16(v);
|
||||
assert(sum == vec_size);
|
||||
|
||||
buffer.fill(0);
|
||||
v = hn::Load(tag, &buffer[0]);
|
||||
sum= WideningSumS16(v);
|
||||
assert(sum == 0);
|
||||
|
||||
buffer.fill(std::numeric_limits<int16_t>::max());
|
||||
v = hn::Load(tag, &buffer[0]);
|
||||
sum= WideningSumS16(v);
|
||||
assert(sum == std::numeric_limits<int16_t>::max() * (int)vec_size);
|
||||
|
||||
buffer.fill(std::numeric_limits<int16_t>::min());
|
||||
v = hn::Load(tag, &buffer[0]);
|
||||
sum= WideningSumS16(v);
|
||||
assert(sum == std::numeric_limits<int16_t>::min() * (int)vec_size);
|
||||
}
|
||||
} // namespace quicktex::tests
|
@ -21,6 +21,4 @@
|
||||
|
||||
namespace quicktex::tests {
|
||||
|
||||
void TestWidenSumS16();
|
||||
|
||||
} // namespace quicktex::tests
|
@ -42,32 +42,20 @@ function(set_simd_flags target_name)
|
||||
if (highway_mode STREQUAL "AUTO")
|
||||
# setting -march=native on an M1 causes Clang to freak out
|
||||
if (MSVC)
|
||||
if (CLANG_CL)
|
||||
target_compile_options(${target_name} PUBLIC /clang:-march=native)
|
||||
else()
|
||||
#MSVC has no -march=native equivalent. womp
|
||||
message(WARNING "Compiling using cl.exe without settig an explicit QUICKTEX_HWY_MODE defaults to serial operations. Please compile with clang-cl if you need vectorization")
|
||||
endif()
|
||||
#MSVC has no -march=native equivalent. womp
|
||||
message(WARNING "Compiling using MSVC without settig an explicit QUICKTEX_HWY_MODE defaults to serial operations. Please compile with Clang if you need vectorization")
|
||||
elseif (!ARM)
|
||||
target_compile_options(${target_name} PUBLIC -march=native)
|
||||
endif ()
|
||||
elseif (highway_mode STREQUAL "SSSE3")
|
||||
if (MSVC)
|
||||
if (CLANG_CL)
|
||||
target_compile_options(${target_name} PUBLIC /clang:-mssse3)
|
||||
else()
|
||||
message(SEND_ERROR "Compiling using SSSE3 is not supported with the cl.exe compiler. Please use AVX or compile with clang-cl")
|
||||
endif()
|
||||
message(SEND_ERROR "Compiling using SSSE3 is not supported with the MSVC compiler. Please use AVX or compile withClang")
|
||||
else ()
|
||||
target_compile_options(${target_name} PUBLIC -mssse3)
|
||||
endif ()
|
||||
elseif (highway_mode STREQUAL "SSE4")
|
||||
if (MSVC)
|
||||
if (CLANG_CL)
|
||||
target_compile_options(${target_name} PUBLIC /clang:-msse4)
|
||||
else()
|
||||
message(SEND_ERROR "Compiling using SSE4 is not supported with the MSVC compiler. Please use AVX or compile with Clang")
|
||||
endif()
|
||||
message(SEND_ERROR "Compiling using SSE4 is not supported with the MSVC compiler. Please use AVX or compile with Clang")
|
||||
else ()
|
||||
target_compile_options(${target_name} PUBLIC -msse4)
|
||||
endif ()
|
||||
|
Loading…
Reference in New Issue
Block a user