Remove Highway

This commit is contained in:
Andrew Cassidy 2022-05-20 20:18:18 -07:00
parent 04fece2771
commit 79f77a24b2
7 changed files with 4 additions and 133 deletions

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "external/highway"]
path = external/highway
url = https://github.com/google/highway.git

View File

@ -1,27 +1,14 @@
cmake_minimum_required(VERSION 3.18)
if(MSVC AND ENV{CC} MATCHES ".*clang-cl.*")
set(CMAKE_GENERATOR_TOOLSET "ClangCL")
set(CLANG_CL TRUE)
else()
set(CLANG_CL FALSE)
endif()
include(tools/CompilerWarnings.cmake)
include(tools/CPUFeatures.cmake)
set(CMAKE_VERBOSE_MAKEFILE ON)
set(HWY_ENABLE_INSTALL OFF)
set(HWY_ENABLE_EXAMPLES OFF)
option(BUILD_TESTING "" OFF)
project(quicktex)
# Find dependencies
find_package(Python COMPONENTS Interpreter Development.Module)
find_package(pybind11 CONFIG REQUIRED)
find_package(OpenMP)
add_subdirectory(external/highway)
# Collect source files
file(GLOB SOURCE_FILES

1
external/highway vendored

@ -1 +0,0 @@
Subproject commit fa49471c43ba37c528dd77b0ef060cdc313054bb

View File

@ -18,70 +18,6 @@
*/
#pragma once
#include <hwy/highway.h>
namespace hn = hwy::HWY_NAMESPACE;
#if HWY_TARGET == HWY_NEON
#include <arm_neon.h>
#elif HWY_ARCH_X86_64
#include <immintrin.h>
#endif
namespace quicktex {
using Tag_s16 = hn::ScalableTag<int16_t>;
using Vec_s16 = hn::Vec<Tag_s16>;
using Tag_s32 = hn::ScalableTag<int32_t>;
using Vec_s32 = hn::Vec<Tag_s32>;
const Tag_s16 TagS16;
const Tag_s32 TagS32;
/// Helper function for doing sum-of-lanes without a tag lvalue. We're not targeting SVE (yet) so this should work fine.
/// \tparam V Vector type to sum (8- and 16-bit integers are NOT supported)
/// \param v Vector to sum
/// \return The sum of all lanes in each lane.
template <typename V> inline V SumOfLanes(V v) {
hn::DFromV<V> tag;
return hn::SumOfLanes(tag, v);
}
inline int32_t WideningSumS16(const Vec_s16 v) {
#if HWY_TARGET == HWY_SCALAR
// In Scalar mode this is a no-op, since there's only one lane
return (int32_t)v.raw;
#elif HWY_TARGET == HWY_EMU128
// In emulated 128-bit mode, do the addition serially
int acc = 0;
for (unsigned i = 0; i < hn::MaxLanes(TagS16); i++) { acc += v.raw[i]; }
return acc;
#elif HWY_TARGET == HWY_NEON
static_assert(hn::MaxLanes(TagS16) == 8);
static_assert(hn::MaxLanes(TagS32) == 4);
// Pairwise widening sum, then sum all N/2 widened lanes
auto paired = Vec_s32(vpaddlq_s16(v.raw));
auto sums = SumOfLanes(paired);
return hn::GetLane(sums);
#elif HWY_ARCH_X86_64
#if HWY_TARGET == HWY_AVX2 || HWY_TARGET == HWY_AVX3
static_assert(hn::MaxLanes(TagS16) == 16);
static_assert(hn::MaxLanes(TagS32) == 8);
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
auto paired = Vec_s32{_mm256_madd_epi16(v.raw, _mm256_set1_epi16(1))};
auto sums = SumOfLanes(paired);
return hn::GetLane(sums);
#else
static_assert(hn::MaxLanes(TagS16) == 8);
static_assert(hn::MaxLanes(TagS32) == 4);
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
auto paired = Vec_s32{_mm_madd_epi16(v.raw, _mm_set1_epi16(1))};
auto sums = SumOfLanes(paired);
return hn::GetLane(sums);
#endif
#endif
}
} // namespace quicktex

View File

@ -19,8 +19,6 @@
#include "TestSIMD.h"
#include <hwy/highway.h>
#include <array>
#include <cassert>
#include <cstdint>
@ -28,38 +26,6 @@
#include "../VecUtil.h"
namespace hn = hwy::HWY_NAMESPACE;
namespace quicktex::tests {
void TestWidenSumS16() {
const hn::ScalableTag<int16_t> tag;
const auto vec_size = hn::MaxLanes(tag);
std::array<int16_t, vec_size> buffer;
std::iota(buffer.begin(), buffer.end(), 1);
auto v = hn::Load(tag, &buffer[0]);
auto sum = WideningSumS16(v);
assert(sum == vec_size / 2 * (vec_size + 1)); // Gauss formula
buffer.fill(1);
v = hn::Load(tag, &buffer[0]);
sum = WideningSumS16(v);
assert(sum == vec_size);
buffer.fill(0);
v = hn::Load(tag, &buffer[0]);
sum= WideningSumS16(v);
assert(sum == 0);
buffer.fill(std::numeric_limits<int16_t>::max());
v = hn::Load(tag, &buffer[0]);
sum= WideningSumS16(v);
assert(sum == std::numeric_limits<int16_t>::max() * (int)vec_size);
buffer.fill(std::numeric_limits<int16_t>::min());
v = hn::Load(tag, &buffer[0]);
sum= WideningSumS16(v);
assert(sum == std::numeric_limits<int16_t>::min() * (int)vec_size);
}
} // namespace quicktex::tests

View File

@ -21,6 +21,4 @@
namespace quicktex::tests {
void TestWidenSumS16();
} // namespace quicktex::tests

View File

@ -42,32 +42,20 @@ function(set_simd_flags target_name)
if (highway_mode STREQUAL "AUTO")
# setting -march=native on an M1 causes Clang to freak out
if (MSVC)
if (CLANG_CL)
target_compile_options(${target_name} PUBLIC /clang:-march=native)
else()
#MSVC has no -march=native equivalent. womp
message(WARNING "Compiling using cl.exe without settig an explicit QUICKTEX_HWY_MODE defaults to serial operations. Please compile with clang-cl if you need vectorization")
endif()
#MSVC has no -march=native equivalent. womp
message(WARNING "Compiling using MSVC without settig an explicit QUICKTEX_HWY_MODE defaults to serial operations. Please compile with Clang if you need vectorization")
elseif (!ARM)
target_compile_options(${target_name} PUBLIC -march=native)
endif ()
elseif (highway_mode STREQUAL "SSSE3")
if (MSVC)
if (CLANG_CL)
target_compile_options(${target_name} PUBLIC /clang:-mssse3)
else()
message(SEND_ERROR "Compiling using SSSE3 is not supported with the cl.exe compiler. Please use AVX or compile with clang-cl")
endif()
message(SEND_ERROR "Compiling using SSSE3 is not supported with the MSVC compiler. Please use AVX or compile withClang")
else ()
target_compile_options(${target_name} PUBLIC -mssse3)
endif ()
elseif (highway_mode STREQUAL "SSE4")
if (MSVC)
if (CLANG_CL)
target_compile_options(${target_name} PUBLIC /clang:-msse4)
else()
message(SEND_ERROR "Compiling using SSE4 is not supported with the MSVC compiler. Please use AVX or compile with Clang")
endif()
message(SEND_ERROR "Compiling using SSE4 is not supported with the MSVC compiler. Please use AVX or compile with Clang")
else ()
target_compile_options(${target_name} PUBLIC -msse4)
endif ()