diff --git a/CMakeLists.txt b/CMakeLists.txt index d201f21..812d365 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -17,6 +17,7 @@ add_subdirectory(external/highway) # Collect source files file(GLOB SOURCE_FILES "quicktex/*.cpp" + "quicktex/tests/*.cpp" "quicktex/s3tc/*.cpp" "quicktex/s3tc/bc1/*.cpp" "quicktex/s3tc/bc3/*.cpp" @@ -27,6 +28,7 @@ file(GLOB SOURCE_FILES file(GLOB HEADER_FILES "quicktex/*.h" + "quicktex/tests/*.h" "quicktex/s3tc/*.h" "quicktex/s3tc/bc1/*.h" "quicktex/s3tc/bc3/*.h" @@ -53,6 +55,8 @@ if (OpenMP_CXX_FOUND) target_link_libraries(_quicktex PUBLIC OpenMP::OpenMP_CXX) endif () +target_link_libraries(_quicktex PUBLIC hwy) + # Set module features, like C/C++ standards target_compile_features(_quicktex PUBLIC cxx_std_17 c_std_11) diff --git a/quicktex/VecUtil.h b/quicktex/VecUtil.h new file mode 100644 index 0000000..30d7d69 --- /dev/null +++ b/quicktex/VecUtil.h @@ -0,0 +1,84 @@ +/* Quicktex Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#pragma once + +#include + +namespace hn = hwy::HWY_NAMESPACE; + +#if HWY_TARGET == HWY_NEON +#include +#elif HWY_ARCH_X86_64 +#include +#endif + +namespace quicktex { + +using Tag_s16 = hn::ScalableTag; +using Vec_s16 = hn::Vec; +using Tag_s32 = hn::ScalableTag; +using Vec_s32 = hn::Vec; + +const Tag_s16 TagS16; +const Tag_s32 TagS32; + +/// Helper function for doing sum-of-lanes without a tag lvalue. We're not targeting SVE (yet) so this should work fine. +/// \tparam V Vector type to sum (8- and 16-bit integers are NOT supported) +/// \param v Vector to sum +/// \return The sum of all lanes in each lane. +template inline V SumOfLanes(V v) { + hn::DFromV tag; + return hn::SumOfLanes(tag, v); +} + +inline int32_t WideningSumS16(const Vec_s16 v) { +#if HWY_TARGET == HWY_SCALAR + // In Scalar mode this is a no-op, since there's only one lane + return (int32_t)v.raw; +#elif HWY_TARGET == HWY_NEON + static_assert(hn::MaxLanes(TagS16) == 8); + static_assert(hn::MaxLanes(TagS32) == 4); + + // Pairwise widening sum, then sum all N/2 widened lanes + auto paired = Vec_s32(vpaddlq_s16(v.raw)); + auto sums = SumOfLanes(paired); + return hn::GetLane(sums); +#elif HWY_ARCH_X86_64 +#if HWY_TARGET == HWY_AVX2 || HWY_TARGET == HWY_AVX3 + static_assert(hn::MaxLanes(TagS16) == 16); + static_assert(hn::MaxLanes(TagS32) == 8); + + // Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes + auto paired = Vec_s32(_mm256_madd_epi16(v.raw, __mm256_set1_epi16(1))); + auto sums = SumOfLanes(paired); + return hn::GetLane(sums); +#else + static_assert(hn::MaxLanes(TagS16) == 8); + static_assert(hn::MaxLanes(TagS32) == 4); + + // Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes + auto paired = Vec_s32(_mm_madd_epi16(v.raw, _mm_set1_epi16(1))); + auto sums = SumOfLanes(paired); + return hn::GetLane(sums); +#endif +#endif +} + +} // namespace quicktex \ No newline at end of file diff --git a/quicktex/_bindings.cpp b/quicktex/_bindings.cpp index 9988fe0..b71180b 100644 --- a/quicktex/_bindings.cpp +++ b/quicktex/_bindings.cpp @@ -35,6 +35,7 @@ namespace py = pybind11; namespace quicktex::bindings { void InitS3TC(py::module_ &m); +void InitCTests(py::module_ &m); PYBIND11_MODULE(_quicktex, m) { m.doc() = "More Stuff"; @@ -69,6 +70,7 @@ PYBIND11_MODULE(_quicktex, m) { DefSubscript2D(raw_texture, &RawTexture::GetPixel, &RawTexture::SetPixel, &RawTexture::Size); InitS3TC(m); + InitCTests(m); } } // namespace quicktex::bindings \ No newline at end of file diff --git a/quicktex/s3tc/bc1/BC1Encoder.cpp b/quicktex/s3tc/bc1/BC1Encoder.cpp index 396f6e4..6735ed4 100644 --- a/quicktex/s3tc/bc1/BC1Encoder.cpp +++ b/quicktex/s3tc/bc1/BC1Encoder.cpp @@ -37,6 +37,7 @@ #include "../../Vector4Int.h" #include "../../bitwiseEnums.h" #include "../../util.h" +#include "../../VecUtil.h" #include "Histogram.h" #include "OrderTable.h" #include "SingleColorTable.h" diff --git a/quicktex/tests/TestSIMD.cpp b/quicktex/tests/TestSIMD.cpp new file mode 100644 index 0000000..5b67ab2 --- /dev/null +++ b/quicktex/tests/TestSIMD.cpp @@ -0,0 +1,65 @@ +/* Quicktex Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#include "TestSIMD.h" + +#include + +#include +#include +#include +#include + +#include "../VecUtil.h" + +namespace hn = hwy::HWY_NAMESPACE; + +namespace quicktex::tests { + +void TestWidenSumS16() { + const hn::ScalableTag tag; + const auto vec_size = hn::MaxLanes(tag); + std::array buffer; + + std::iota(buffer.begin(), buffer.end(), 1); + auto v = hn::Load(tag, &buffer[0]); + auto sum = WideningSumS16(v); + assert(sum == vec_size / 2 * (vec_size + 1)); // Gauss formula + + buffer.fill(1); + v = hn::Load(tag, &buffer[0]); + sum = WideningSumS16(v); + assert(sum == vec_size); + + buffer.fill(0); + v = hn::Load(tag, &buffer[0]); + sum= WideningSumS16(v); + assert(sum == 0); + + buffer.fill(std::numeric_limits::max()); + v = hn::Load(tag, &buffer[0]); + sum= WideningSumS16(v); + assert(sum == std::numeric_limits::max() * (int)vec_size); + + buffer.fill(std::numeric_limits::min()); + v = hn::Load(tag, &buffer[0]); + sum= WideningSumS16(v); + assert(sum == std::numeric_limits::min() * (int)vec_size); +} +} // namespace quicktex::tests \ No newline at end of file diff --git a/quicktex/tests/TestSIMD.h b/quicktex/tests/TestSIMD.h new file mode 100644 index 0000000..1a8dc82 --- /dev/null +++ b/quicktex/tests/TestSIMD.h @@ -0,0 +1,26 @@ +/* Quicktex Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#pragma once + +namespace quicktex::tests { + +void TestWidenSumS16(); + +} // namespace quicktex::tests \ No newline at end of file diff --git a/quicktex/tests/_bindings.cpp b/quicktex/tests/_bindings.cpp new file mode 100644 index 0000000..46134a2 --- /dev/null +++ b/quicktex/tests/_bindings.cpp @@ -0,0 +1,44 @@ +/* Quicktex Texture Compression Library + Copyright (C) 2021 Andrew Cassidy + Partially derived from rgbcx.h written by Richard Geldreich + and licenced under the public domain + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Lesser General Public License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with this program. If not, see . + */ + +#include "../_bindings.h" + +#include +#include + +#include +#include +#include +#include +#include + +#include "TestSIMD.h" + +namespace py = pybind11; +namespace quicktex::bindings { + +using namespace pybind11::literals; +using namespace quicktex::tests; + +void InitCTests(py::module_ &quicktex) { + py::module_ ctests = quicktex.def_submodule("_ctests", "Internal tests for C-level functions"); + + ctests.def("test_WidenSumS16", &TestWidenSumS16); +} +} // namespace quicktex::bindings \ No newline at end of file diff --git a/tests/test_ctest.py b/tests/test_ctest.py new file mode 100644 index 0000000..1c515e8 --- /dev/null +++ b/tests/test_ctest.py @@ -0,0 +1,8 @@ +import unittest + +import _quicktex._ctests as c + + +class TestCTest(unittest.TestCase): + def test_WidenSumS16(self): + c.test_WidenSumS16() diff --git a/tools/CompilerWarnings.cmake b/tools/CompilerWarnings.cmake index 0b7ef2a..eee5c35 100644 --- a/tools/CompilerWarnings.cmake +++ b/tools/CompilerWarnings.cmake @@ -52,7 +52,7 @@ function(set_project_warnings project_name) -Wunused # warn on anything being unused -Woverloaded-virtual # warn if you overload (not override) a virtual # function - -Wpedantic # warn if non-standard C++ is used +# -Wpedantic # warn if non-standard C++ is used #-Wconversion # warn on type conversions that may lose data #-Wsign-conversion # warn on sign conversions -Wnull-dereference # warn if a null dereference is detected