diff --git a/.gitmodules b/.gitmodules index 6727298..e69de29 100644 --- a/.gitmodules +++ b/.gitmodules @@ -1,3 +0,0 @@ -[submodule "external/highway"] - path = external/highway - url = https://github.com/google/highway.git diff --git a/CMakeLists.txt b/CMakeLists.txt index 76d558a..aa74c79 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,27 +1,14 @@ cmake_minimum_required(VERSION 3.18) - -if(MSVC AND ENV{CC} MATCHES ".*clang-cl.*") - set(CMAKE_GENERATOR_TOOLSET "ClangCL") - set(CLANG_CL TRUE) -else() - set(CLANG_CL FALSE) -endif() - include(tools/CompilerWarnings.cmake) include(tools/CPUFeatures.cmake) set(CMAKE_VERBOSE_MAKEFILE ON) -set(HWY_ENABLE_INSTALL OFF) -set(HWY_ENABLE_EXAMPLES OFF) -option(BUILD_TESTING "" OFF) - project(quicktex) # Find dependencies find_package(Python COMPONENTS Interpreter Development.Module) find_package(pybind11 CONFIG REQUIRED) find_package(OpenMP) -add_subdirectory(external/highway) # Collect source files file(GLOB SOURCE_FILES diff --git a/external/highway b/external/highway deleted file mode 160000 index fa49471..0000000 --- a/external/highway +++ /dev/null @@ -1 +0,0 @@ -Subproject commit fa49471c43ba37c528dd77b0ef060cdc313054bb diff --git a/quicktex/VecUtil.h b/quicktex/VecUtil.h index 1fc050a..bf44046 100644 --- a/quicktex/VecUtil.h +++ b/quicktex/VecUtil.h @@ -18,70 +18,6 @@ */ #pragma once -#include -namespace hn = hwy::HWY_NAMESPACE; - -#if HWY_TARGET == HWY_NEON -#include -#elif HWY_ARCH_X86_64 -#include -#endif - namespace quicktex { -using Tag_s16 = hn::ScalableTag; -using Vec_s16 = hn::Vec; -using Tag_s32 = hn::ScalableTag; -using Vec_s32 = hn::Vec; - -const Tag_s16 TagS16; -const Tag_s32 TagS32; - -/// Helper function for doing sum-of-lanes without a tag lvalue. We're not targeting SVE (yet) so this should work fine. -/// \tparam V Vector type to sum (8- and 16-bit integers are NOT supported) -/// \param v Vector to sum -/// \return The sum of all lanes in each lane. -template inline V SumOfLanes(V v) { - hn::DFromV tag; - return hn::SumOfLanes(tag, v); -} - -inline int32_t WideningSumS16(const Vec_s16 v) { -#if HWY_TARGET == HWY_SCALAR - // In Scalar mode this is a no-op, since there's only one lane - return (int32_t)v.raw; -#elif HWY_TARGET == HWY_EMU128 - // In emulated 128-bit mode, do the addition serially - int acc = 0; - for (unsigned i = 0; i < hn::MaxLanes(TagS16); i++) { acc += v.raw[i]; } - return acc; -#elif HWY_TARGET == HWY_NEON - static_assert(hn::MaxLanes(TagS16) == 8); - static_assert(hn::MaxLanes(TagS32) == 4); - - // Pairwise widening sum, then sum all N/2 widened lanes - auto paired = Vec_s32(vpaddlq_s16(v.raw)); - auto sums = SumOfLanes(paired); - return hn::GetLane(sums); -#elif HWY_ARCH_X86_64 -#if HWY_TARGET == HWY_AVX2 || HWY_TARGET == HWY_AVX3 - static_assert(hn::MaxLanes(TagS16) == 16); - static_assert(hn::MaxLanes(TagS32) == 8); - - // Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes - auto paired = Vec_s32{_mm256_madd_epi16(v.raw, _mm256_set1_epi16(1))}; - auto sums = SumOfLanes(paired); - return hn::GetLane(sums); -#else - static_assert(hn::MaxLanes(TagS16) == 8); - static_assert(hn::MaxLanes(TagS32) == 4); - - // Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes - auto paired = Vec_s32{_mm_madd_epi16(v.raw, _mm_set1_epi16(1))}; - auto sums = SumOfLanes(paired); - return hn::GetLane(sums); -#endif -#endif -} - } // namespace quicktex \ No newline at end of file diff --git a/quicktex/tests/TestSIMD.cpp b/quicktex/tests/TestSIMD.cpp index 5b67ab2..4329378 100644 --- a/quicktex/tests/TestSIMD.cpp +++ b/quicktex/tests/TestSIMD.cpp @@ -19,8 +19,6 @@ #include "TestSIMD.h" -#include - #include #include #include @@ -28,38 +26,6 @@ #include "../VecUtil.h" -namespace hn = hwy::HWY_NAMESPACE; - namespace quicktex::tests { -void TestWidenSumS16() { - const hn::ScalableTag tag; - const auto vec_size = hn::MaxLanes(tag); - std::array buffer; - - std::iota(buffer.begin(), buffer.end(), 1); - auto v = hn::Load(tag, &buffer[0]); - auto sum = WideningSumS16(v); - assert(sum == vec_size / 2 * (vec_size + 1)); // Gauss formula - - buffer.fill(1); - v = hn::Load(tag, &buffer[0]); - sum = WideningSumS16(v); - assert(sum == vec_size); - - buffer.fill(0); - v = hn::Load(tag, &buffer[0]); - sum= WideningSumS16(v); - assert(sum == 0); - - buffer.fill(std::numeric_limits::max()); - v = hn::Load(tag, &buffer[0]); - sum= WideningSumS16(v); - assert(sum == std::numeric_limits::max() * (int)vec_size); - - buffer.fill(std::numeric_limits::min()); - v = hn::Load(tag, &buffer[0]); - sum= WideningSumS16(v); - assert(sum == std::numeric_limits::min() * (int)vec_size); -} } // namespace quicktex::tests \ No newline at end of file diff --git a/quicktex/tests/TestSIMD.h b/quicktex/tests/TestSIMD.h index 1a8dc82..0976b9f 100644 --- a/quicktex/tests/TestSIMD.h +++ b/quicktex/tests/TestSIMD.h @@ -21,6 +21,4 @@ namespace quicktex::tests { -void TestWidenSumS16(); - } // namespace quicktex::tests \ No newline at end of file diff --git a/tools/CPUFeatures.cmake b/tools/CPUFeatures.cmake index 00e364b..2ee4425 100644 --- a/tools/CPUFeatures.cmake +++ b/tools/CPUFeatures.cmake @@ -42,32 +42,20 @@ function(set_simd_flags target_name) if (highway_mode STREQUAL "AUTO") # setting -march=native on an M1 causes Clang to freak out if (MSVC) - if (CLANG_CL) - target_compile_options(${target_name} PUBLIC /clang:-march=native) - else() - #MSVC has no -march=native equivalent. womp - message(WARNING "Compiling using cl.exe without settig an explicit QUICKTEX_HWY_MODE defaults to serial operations. Please compile with clang-cl if you need vectorization") - endif() + #MSVC has no -march=native equivalent. womp + message(WARNING "Compiling using MSVC without settig an explicit QUICKTEX_HWY_MODE defaults to serial operations. Please compile with Clang if you need vectorization") elseif (!ARM) target_compile_options(${target_name} PUBLIC -march=native) endif () elseif (highway_mode STREQUAL "SSSE3") if (MSVC) - if (CLANG_CL) - target_compile_options(${target_name} PUBLIC /clang:-mssse3) - else() - message(SEND_ERROR "Compiling using SSSE3 is not supported with the cl.exe compiler. Please use AVX or compile with clang-cl") - endif() + message(SEND_ERROR "Compiling using SSSE3 is not supported with the MSVC compiler. Please use AVX or compile withClang") else () target_compile_options(${target_name} PUBLIC -mssse3) endif () elseif (highway_mode STREQUAL "SSE4") if (MSVC) - if (CLANG_CL) - target_compile_options(${target_name} PUBLIC /clang:-msse4) - else() - message(SEND_ERROR "Compiling using SSE4 is not supported with the MSVC compiler. Please use AVX or compile with Clang") - endif() + message(SEND_ERROR "Compiling using SSE4 is not supported with the MSVC compiler. Please use AVX or compile with Clang") else () target_compile_options(${target_name} PUBLIC -msse4) endif ()