diff --git a/.gitmodules b/.gitmodules index e69de29..49f1431 100644 --- a/.gitmodules +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/xsimd"] + path = external/xsimd + url = https://github.com/xtensor-stack/xsimd.git diff --git a/CMakeLists.txt b/CMakeLists.txt index aa74c79..fff8c15 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.18) include(tools/CompilerWarnings.cmake) -include(tools/CPUFeatures.cmake) +include(tools/SIMDFlags.cmake) set(CMAKE_VERBOSE_MAKEFILE ON) project(quicktex) @@ -10,6 +10,8 @@ find_package(Python COMPONENTS Interpreter Development.Module) find_package(pybind11 CONFIG REQUIRED) find_package(OpenMP) +add_subdirectory(external/xsimd) + # Collect source files file(GLOB SOURCE_FILES "quicktex/*.cpp" @@ -51,7 +53,7 @@ if (OpenMP_CXX_FOUND) target_link_libraries(_quicktex PUBLIC OpenMP::OpenMP_CXX) endif () -target_link_libraries(_quicktex PUBLIC hwy) +target_link_libraries(_quicktex PUBLIC xsimd) # Set module features, like C/C++ standards target_compile_features(_quicktex PUBLIC cxx_std_17 c_std_11) diff --git a/external/xsimd b/external/xsimd new file mode 160000 index 0000000..1577b02 --- /dev/null +++ b/external/xsimd @@ -0,0 +1 @@ +Subproject commit 1577b02d549cca52aa5e943c16f2600950480289 diff --git a/quicktex/tests/_bindings.cpp b/quicktex/tests/_bindings.cpp index 46134a2..e5b18fc 100644 --- a/quicktex/tests/_bindings.cpp +++ b/quicktex/tests/_bindings.cpp @@ -39,6 +39,6 @@ using namespace quicktex::tests; void InitCTests(py::module_ &quicktex) { py::module_ ctests = quicktex.def_submodule("_ctests", "Internal tests for C-level functions"); - ctests.def("test_WidenSumS16", &TestWidenSumS16); +// ctests.def("test_WidenSumS16", &TestWidenSumS16); } } // namespace quicktex::bindings \ No newline at end of file diff --git a/tools/CPUFeatures.cmake b/tools/CPUFeatures.cmake deleted file mode 100644 index 2ee4425..0000000 --- a/tools/CPUFeatures.cmake +++ /dev/null @@ -1,70 +0,0 @@ -function(set_simd_flags target_name) - if (DEFINED ENV{QUICKTEX_HWY_MODE}) - set(highway_mode $ENV{QUICKTEX_HWY_MODE}) - message("Highway mode is ${highway_mode}") - else () - message("Defaulting to AUTO highway mode") - set(highway_mode "AUTO") - endif () - - if ((CMAKE_OSX_ARCHITECTURES MATCHES ".*x86_64.*") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")) - set(X86 TRUE) - message("X86 Detected") - else () - set(X86 FALSE) - endif () - - if ((CMAKE_OSX_ARCHITECTURES MATCHES ".*arm64.*") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(ARM64)|(aarch64)")) - set(ARM TRUE) - message("ARM Detected") - else () - set(ARM FALSE) - endif () - - if (highway_mode STREQUAL "SCALAR") - # force Highway to use scalar ops. This should really only be used for testing - if (MSVC) - target_compile_options(${target_name} PUBLIC /DHWY_COMPILE_ONLY_SCALAR=1) - else () - target_compile_options(${target_name} PUBLIC -DHWY_COMPILE_ONLY_SCALAR=1) - endif () - return() - endif () - - # dynamic disbatch is not supported - if (MSVC) - target_compile_options(${target_name} PUBLIC /DHWY_COMPILE_ONLY_STATIC=1) - else () - target_compile_options(${target_name} PUBLIC -DHWY_COMPILE_ONLY_STATIC=1) - endif () - - if (X86) - if (highway_mode STREQUAL "AUTO") - # setting -march=native on an M1 causes Clang to freak out - if (MSVC) - #MSVC has no -march=native equivalent. womp - message(WARNING "Compiling using MSVC without settig an explicit QUICKTEX_HWY_MODE defaults to serial operations. Please compile with Clang if you need vectorization") - elseif (!ARM) - target_compile_options(${target_name} PUBLIC -march=native) - endif () - elseif (highway_mode STREQUAL "SSSE3") - if (MSVC) - message(SEND_ERROR "Compiling using SSSE3 is not supported with the MSVC compiler. Please use AVX or compile withClang") - else () - target_compile_options(${target_name} PUBLIC -mssse3) - endif () - elseif (highway_mode STREQUAL "SSE4") - if (MSVC) - message(SEND_ERROR "Compiling using SSE4 is not supported with the MSVC compiler. Please use AVX or compile with Clang") - else () - target_compile_options(${target_name} PUBLIC -msse4) - endif () - elseif (highway_mode STREQUAL "AVX2") - if (MSVC) - target_compile_options(${target_name} PUBLIC /arch:AVX2) - else () - target_compile_options(${target_name} PUBLIC -mavx2) - endif () - endif () - endif () -endfunction() \ No newline at end of file diff --git a/tools/SIMDFlags.cmake b/tools/SIMDFlags.cmake new file mode 100644 index 0000000..9ccd5e7 --- /dev/null +++ b/tools/SIMDFlags.cmake @@ -0,0 +1,64 @@ +function(set_simd_flags target_name) + if (DEFINED ENV{QUICKTEX_SIMD_MODE}) + set(simd_mode $ENV{QUICKTEX_SIMD_MODE}) + message("SIMD mode is ${simd_mode}") + else () + message("Defaulting to AUTO SIMD mode. Resulting binary is not fit for distributing to other computers!") + set(simd_mode "AUTO") + endif () + + if ((CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")) + set(X86 TRUE) + message("X86 Detected") + else () + set(X86 FALSE) + endif () + + if ((CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(ARM64)|(aarch64)")) + set(ARM TRUE) + message("ARM Detected") + else () + set(ARM FALSE) + endif () + + if (simd_mode STREQUAL "SCALAR") + # force xsimd to use scalar ops. This should really only be used for testing, + # since SSE2 and NEON are guranteed on 64-bit platforms + if (MSVC) + target_compile_options(${target_name} PUBLIC /DXSIMD_NO_SUPPORTED_ARCHITECTURE=1) + else () + target_compile_options(${target_name} PUBLIC -DXSIMD_NO_SUPPORTED_ARCHITECTURE=1) + endif () + return() + endif () + + if (X86) + if (simd_mode STREQUAL "AUTO") + if (MSVC) + #MSVC has no -march=native equivalent. womp + elseif (!ARM) + # setting -march=native on an M1 causes Clang to freak out, + # and arm64 is pretty samey instruction set wise (arm9 and SVE2 notwithstanding) + target_compile_options(${target_name} PUBLIC -march=native) + endif () + elseif (simd_mode STREQUAL "SSSE3") + if (MSVC) + target_compile_options(${target_name} PUBLIC /DXSIMD_WITH_SSSE3) + else () + target_compile_options(${target_name} PUBLIC -mssse3) + endif () + elseif (simd_mode STREQUAL "SSE4") + if (MSVC) + target_compile_options(${target_name} PUBLIC /DXSIMD_WITH_SSE4_2 /d2archSSE42) + else () + target_compile_options(${target_name} PUBLIC -msse4) + endif () + elseif (simd_mode STREQUAL "AVX2") + if (MSVC) + target_compile_options(${target_name} PUBLIC /arch:AVX2) + else () + target_compile_options(${target_name} PUBLIC -mavx2) + endif () + endif () + endif () +endfunction() \ No newline at end of file