add widening horizontal add for s16 vectors

This commit is contained in:
Andrew Cassidy 2022-05-15 18:08:36 -07:00
parent bc925d3949
commit f7b0cbe76b
9 changed files with 235 additions and 1 deletions

View File

@ -17,6 +17,7 @@ add_subdirectory(external/highway)
# Collect source files
file(GLOB SOURCE_FILES
"quicktex/*.cpp"
"quicktex/tests/*.cpp"
"quicktex/s3tc/*.cpp"
"quicktex/s3tc/bc1/*.cpp"
"quicktex/s3tc/bc3/*.cpp"
@ -27,6 +28,7 @@ file(GLOB SOURCE_FILES
file(GLOB HEADER_FILES
"quicktex/*.h"
"quicktex/tests/*.h"
"quicktex/s3tc/*.h"
"quicktex/s3tc/bc1/*.h"
"quicktex/s3tc/bc3/*.h"
@ -53,6 +55,8 @@ if (OpenMP_CXX_FOUND)
target_link_libraries(_quicktex PUBLIC OpenMP::OpenMP_CXX)
endif ()
target_link_libraries(_quicktex PUBLIC hwy)
# Set module features, like C/C++ standards
target_compile_features(_quicktex PUBLIC cxx_std_17 c_std_11)

84
quicktex/VecUtil.h Normal file
View File

@ -0,0 +1,84 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <hwy/highway.h>
namespace hn = hwy::HWY_NAMESPACE;
#if HWY_TARGET == HWY_NEON
#include <arm_neon.h>
#elif HWY_ARCH_X86_64
#include <immintrin.h>
#endif
namespace quicktex {
using Tag_s16 = hn::ScalableTag<int16_t>;
using Vec_s16 = hn::Vec<Tag_s16>;
using Tag_s32 = hn::ScalableTag<int32_t>;
using Vec_s32 = hn::Vec<Tag_s32>;
const Tag_s16 TagS16;
const Tag_s32 TagS32;
/// Helper function for doing sum-of-lanes without a tag lvalue. We're not targeting SVE (yet) so this should work fine.
/// \tparam V Vector type to sum (8- and 16-bit integers are NOT supported)
/// \param v Vector to sum
/// \return The sum of all lanes in each lane.
template <typename V> inline V SumOfLanes(V v) {
hn::DFromV<V> tag;
return hn::SumOfLanes(tag, v);
}
inline int32_t WideningSumS16(const Vec_s16 v) {
#if HWY_TARGET == HWY_SCALAR
// In Scalar mode this is a no-op, since there's only one lane
return (int32_t)v.raw;
#elif HWY_TARGET == HWY_NEON
static_assert(hn::MaxLanes(TagS16) == 8);
static_assert(hn::MaxLanes(TagS32) == 4);
// Pairwise widening sum, then sum all N/2 widened lanes
auto paired = Vec_s32(vpaddlq_s16(v.raw));
auto sums = SumOfLanes(paired);
return hn::GetLane(sums);
#elif HWY_ARCH_X86_64
#if HWY_TARGET == HWY_AVX2 || HWY_TARGET == HWY_AVX3
static_assert(hn::MaxLanes(TagS16) == 16);
static_assert(hn::MaxLanes(TagS32) == 8);
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
auto paired = Vec_s32(_mm256_madd_epi16(v.raw, __mm256_set1_epi16(1)));
auto sums = SumOfLanes(paired);
return hn::GetLane(sums);
#else
static_assert(hn::MaxLanes(TagS16) == 8);
static_assert(hn::MaxLanes(TagS32) == 4);
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
auto paired = Vec_s32(_mm_madd_epi16(v.raw, _mm_set1_epi16(1)));
auto sums = SumOfLanes(paired);
return hn::GetLane(sums);
#endif
#endif
}
} // namespace quicktex

View File

@ -35,6 +35,7 @@ namespace py = pybind11;
namespace quicktex::bindings {
void InitS3TC(py::module_ &m);
void InitCTests(py::module_ &m);
PYBIND11_MODULE(_quicktex, m) {
m.doc() = "More Stuff";
@ -69,6 +70,7 @@ PYBIND11_MODULE(_quicktex, m) {
DefSubscript2D(raw_texture, &RawTexture::GetPixel, &RawTexture::SetPixel, &RawTexture::Size);
InitS3TC(m);
InitCTests(m);
}
} // namespace quicktex::bindings

View File

@ -37,6 +37,7 @@
#include "../../Vector4Int.h"
#include "../../bitwiseEnums.h"
#include "../../util.h"
#include "../../VecUtil.h"
#include "Histogram.h"
#include "OrderTable.h"
#include "SingleColorTable.h"

View File

@ -0,0 +1,65 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "TestSIMD.h"
#include <hwy/highway.h>
#include <array>
#include <cassert>
#include <cstdint>
#include <numeric>
#include "../VecUtil.h"
namespace hn = hwy::HWY_NAMESPACE;
namespace quicktex::tests {
void TestWidenSumS16() {
const hn::ScalableTag<int16_t> tag;
const auto vec_size = hn::MaxLanes(tag);
std::array<int16_t, vec_size> buffer;
std::iota(buffer.begin(), buffer.end(), 1);
auto v = hn::Load(tag, &buffer[0]);
auto sum = WideningSumS16(v);
assert(sum == vec_size / 2 * (vec_size + 1)); // Gauss formula
buffer.fill(1);
v = hn::Load(tag, &buffer[0]);
sum = WideningSumS16(v);
assert(sum == vec_size);
buffer.fill(0);
v = hn::Load(tag, &buffer[0]);
sum= WideningSumS16(v);
assert(sum == 0);
buffer.fill(std::numeric_limits<int16_t>::max());
v = hn::Load(tag, &buffer[0]);
sum= WideningSumS16(v);
assert(sum == std::numeric_limits<int16_t>::max() * (int)vec_size);
buffer.fill(std::numeric_limits<int16_t>::min());
v = hn::Load(tag, &buffer[0]);
sum= WideningSumS16(v);
assert(sum == std::numeric_limits<int16_t>::min() * (int)vec_size);
}
} // namespace quicktex::tests

26
quicktex/tests/TestSIMD.h Normal file
View File

@ -0,0 +1,26 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
namespace quicktex::tests {
void TestWidenSumS16();
} // namespace quicktex::tests

View File

@ -0,0 +1,44 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "../_bindings.h"
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <array>
#include <cstddef>
#include <cstdint>
#include <stdexcept>
#include <string>
#include "TestSIMD.h"
namespace py = pybind11;
namespace quicktex::bindings {
using namespace pybind11::literals;
using namespace quicktex::tests;
void InitCTests(py::module_ &quicktex) {
py::module_ ctests = quicktex.def_submodule("_ctests", "Internal tests for C-level functions");
ctests.def("test_WidenSumS16", &TestWidenSumS16);
}
} // namespace quicktex::bindings

8
tests/test_ctest.py Normal file
View File

@ -0,0 +1,8 @@
import unittest
import _quicktex._ctests as c
class TestCTest(unittest.TestCase):
def test_WidenSumS16(self):
c.test_WidenSumS16()

View File

@ -52,7 +52,7 @@ function(set_project_warnings project_name)
-Wunused # warn on anything being unused
-Woverloaded-virtual # warn if you overload (not override) a virtual
# function
-Wpedantic # warn if non-standard C++ is used
# -Wpedantic # warn if non-standard C++ is used
#-Wconversion # warn on type conversions that may lose data
#-Wsign-conversion # warn on sign conversions
-Wnull-dereference # warn if a null dereference is detected