diff --git a/CMakeLists.txt b/CMakeLists.txt index 812d365..a6b9965 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,6 @@ cmake_minimum_required(VERSION 3.18) include(tools/CompilerWarnings.cmake) +include(tools/CPUFeatures.cmake) set(CMAKE_VERBOSE_MAKEFILE ON) set(HWY_ENABLE_INSTALL OFF) @@ -62,11 +63,18 @@ target_compile_features(_quicktex PUBLIC cxx_std_17 c_std_11) # Set compiler warnings set_project_warnings(_quicktex) +set_simd_flags(_quicktex) # Clang-specific if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang") - set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer") - set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -stdlib=libc++ -fsanitize=undefined") + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}") + set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -fsanitize=undefined") set(PROJECT_WARNINGS ${CLANG_WARNINGS}) endif () + +message("RELEASE FLAGS=${CMAKE_CXX_FLAGS}") + +message("DEBUG FLAGS=${CMAKE_CXX_FLAGS_DEBUG}") + + diff --git a/quicktex/VecUtil.h b/quicktex/VecUtil.h index 9dcd478..1fc050a 100644 --- a/quicktex/VecUtil.h +++ b/quicktex/VecUtil.h @@ -69,7 +69,7 @@ inline int32_t WideningSumS16(const Vec_s16 v) { static_assert(hn::MaxLanes(TagS32) == 8); // Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes - auto paired = Vec_s32{_mm256_madd_epi16(v.raw, __mm256_set1_epi16(1))}; + auto paired = Vec_s32{_mm256_madd_epi16(v.raw, _mm256_set1_epi16(1))}; auto sums = SumOfLanes(paired); return hn::GetLane(sums); #else diff --git a/tools/CPUFeatures.cmake b/tools/CPUFeatures.cmake new file mode 100644 index 0000000..2ee4425 --- /dev/null +++ b/tools/CPUFeatures.cmake @@ -0,0 +1,70 @@ +function(set_simd_flags target_name) + if (DEFINED ENV{QUICKTEX_HWY_MODE}) + set(highway_mode $ENV{QUICKTEX_HWY_MODE}) + message("Highway mode is ${highway_mode}") + else () + message("Defaulting to AUTO highway mode") + set(highway_mode "AUTO") + endif () + + if ((CMAKE_OSX_ARCHITECTURES MATCHES ".*x86_64.*") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)")) + set(X86 TRUE) + message("X86 Detected") + else () + set(X86 FALSE) + endif () + + if ((CMAKE_OSX_ARCHITECTURES MATCHES ".*arm64.*") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(ARM64)|(aarch64)")) + set(ARM TRUE) + message("ARM Detected") + else () + set(ARM FALSE) + endif () + + if (highway_mode STREQUAL "SCALAR") + # force Highway to use scalar ops. This should really only be used for testing + if (MSVC) + target_compile_options(${target_name} PUBLIC /DHWY_COMPILE_ONLY_SCALAR=1) + else () + target_compile_options(${target_name} PUBLIC -DHWY_COMPILE_ONLY_SCALAR=1) + endif () + return() + endif () + + # dynamic disbatch is not supported + if (MSVC) + target_compile_options(${target_name} PUBLIC /DHWY_COMPILE_ONLY_STATIC=1) + else () + target_compile_options(${target_name} PUBLIC -DHWY_COMPILE_ONLY_STATIC=1) + endif () + + if (X86) + if (highway_mode STREQUAL "AUTO") + # setting -march=native on an M1 causes Clang to freak out + if (MSVC) + #MSVC has no -march=native equivalent. womp + message(WARNING "Compiling using MSVC without settig an explicit QUICKTEX_HWY_MODE defaults to serial operations. Please compile with Clang if you need vectorization") + elseif (!ARM) + target_compile_options(${target_name} PUBLIC -march=native) + endif () + elseif (highway_mode STREQUAL "SSSE3") + if (MSVC) + message(SEND_ERROR "Compiling using SSSE3 is not supported with the MSVC compiler. Please use AVX or compile withClang") + else () + target_compile_options(${target_name} PUBLIC -mssse3) + endif () + elseif (highway_mode STREQUAL "SSE4") + if (MSVC) + message(SEND_ERROR "Compiling using SSE4 is not supported with the MSVC compiler. Please use AVX or compile with Clang") + else () + target_compile_options(${target_name} PUBLIC -msse4) + endif () + elseif (highway_mode STREQUAL "AVX2") + if (MSVC) + target_compile_options(${target_name} PUBLIC /arch:AVX2) + else () + target_compile_options(${target_name} PUBLIC -mavx2) + endif () + endif () + endif () +endfunction() \ No newline at end of file