mirror of
https://github.com/drewcassidy/quicktex.git
synced 2024-09-13 06:37:34 +00:00
add widening horizontal add for s16 vectors
This commit is contained in:
parent
bc925d3949
commit
f7b0cbe76b
@ -17,6 +17,7 @@ add_subdirectory(external/highway)
|
||||
# Collect source files
|
||||
file(GLOB SOURCE_FILES
|
||||
"quicktex/*.cpp"
|
||||
"quicktex/tests/*.cpp"
|
||||
"quicktex/s3tc/*.cpp"
|
||||
"quicktex/s3tc/bc1/*.cpp"
|
||||
"quicktex/s3tc/bc3/*.cpp"
|
||||
@ -27,6 +28,7 @@ file(GLOB SOURCE_FILES
|
||||
|
||||
file(GLOB HEADER_FILES
|
||||
"quicktex/*.h"
|
||||
"quicktex/tests/*.h"
|
||||
"quicktex/s3tc/*.h"
|
||||
"quicktex/s3tc/bc1/*.h"
|
||||
"quicktex/s3tc/bc3/*.h"
|
||||
@ -53,6 +55,8 @@ if (OpenMP_CXX_FOUND)
|
||||
target_link_libraries(_quicktex PUBLIC OpenMP::OpenMP_CXX)
|
||||
endif ()
|
||||
|
||||
target_link_libraries(_quicktex PUBLIC hwy)
|
||||
|
||||
# Set module features, like C/C++ standards
|
||||
target_compile_features(_quicktex PUBLIC cxx_std_17 c_std_11)
|
||||
|
||||
|
84
quicktex/VecUtil.h
Normal file
84
quicktex/VecUtil.h
Normal file
@ -0,0 +1,84 @@
|
||||
/* Quicktex Texture Compression Library
|
||||
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||
and licenced under the public domain
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <hwy/highway.h>
|
||||
|
||||
namespace hn = hwy::HWY_NAMESPACE;
|
||||
|
||||
#if HWY_TARGET == HWY_NEON
|
||||
#include <arm_neon.h>
|
||||
#elif HWY_ARCH_X86_64
|
||||
#include <immintrin.h>
|
||||
#endif
|
||||
|
||||
namespace quicktex {
|
||||
|
||||
using Tag_s16 = hn::ScalableTag<int16_t>;
|
||||
using Vec_s16 = hn::Vec<Tag_s16>;
|
||||
using Tag_s32 = hn::ScalableTag<int32_t>;
|
||||
using Vec_s32 = hn::Vec<Tag_s32>;
|
||||
|
||||
const Tag_s16 TagS16;
|
||||
const Tag_s32 TagS32;
|
||||
|
||||
/// Helper function for doing sum-of-lanes without a tag lvalue. We're not targeting SVE (yet) so this should work fine.
|
||||
/// \tparam V Vector type to sum (8- and 16-bit integers are NOT supported)
|
||||
/// \param v Vector to sum
|
||||
/// \return The sum of all lanes in each lane.
|
||||
template <typename V> inline V SumOfLanes(V v) {
|
||||
hn::DFromV<V> tag;
|
||||
return hn::SumOfLanes(tag, v);
|
||||
}
|
||||
|
||||
inline int32_t WideningSumS16(const Vec_s16 v) {
|
||||
#if HWY_TARGET == HWY_SCALAR
|
||||
// In Scalar mode this is a no-op, since there's only one lane
|
||||
return (int32_t)v.raw;
|
||||
#elif HWY_TARGET == HWY_NEON
|
||||
static_assert(hn::MaxLanes(TagS16) == 8);
|
||||
static_assert(hn::MaxLanes(TagS32) == 4);
|
||||
|
||||
// Pairwise widening sum, then sum all N/2 widened lanes
|
||||
auto paired = Vec_s32(vpaddlq_s16(v.raw));
|
||||
auto sums = SumOfLanes(paired);
|
||||
return hn::GetLane(sums);
|
||||
#elif HWY_ARCH_X86_64
|
||||
#if HWY_TARGET == HWY_AVX2 || HWY_TARGET == HWY_AVX3
|
||||
static_assert(hn::MaxLanes(TagS16) == 16);
|
||||
static_assert(hn::MaxLanes(TagS32) == 8);
|
||||
|
||||
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
|
||||
auto paired = Vec_s32(_mm256_madd_epi16(v.raw, __mm256_set1_epi16(1)));
|
||||
auto sums = SumOfLanes(paired);
|
||||
return hn::GetLane(sums);
|
||||
#else
|
||||
static_assert(hn::MaxLanes(TagS16) == 8);
|
||||
static_assert(hn::MaxLanes(TagS32) == 4);
|
||||
|
||||
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
|
||||
auto paired = Vec_s32(_mm_madd_epi16(v.raw, _mm_set1_epi16(1)));
|
||||
auto sums = SumOfLanes(paired);
|
||||
return hn::GetLane(sums);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
} // namespace quicktex
|
@ -35,6 +35,7 @@ namespace py = pybind11;
|
||||
namespace quicktex::bindings {
|
||||
|
||||
void InitS3TC(py::module_ &m);
|
||||
void InitCTests(py::module_ &m);
|
||||
|
||||
PYBIND11_MODULE(_quicktex, m) {
|
||||
m.doc() = "More Stuff";
|
||||
@ -69,6 +70,7 @@ PYBIND11_MODULE(_quicktex, m) {
|
||||
DefSubscript2D(raw_texture, &RawTexture::GetPixel, &RawTexture::SetPixel, &RawTexture::Size);
|
||||
|
||||
InitS3TC(m);
|
||||
InitCTests(m);
|
||||
}
|
||||
|
||||
} // namespace quicktex::bindings
|
@ -37,6 +37,7 @@
|
||||
#include "../../Vector4Int.h"
|
||||
#include "../../bitwiseEnums.h"
|
||||
#include "../../util.h"
|
||||
#include "../../VecUtil.h"
|
||||
#include "Histogram.h"
|
||||
#include "OrderTable.h"
|
||||
#include "SingleColorTable.h"
|
||||
|
65
quicktex/tests/TestSIMD.cpp
Normal file
65
quicktex/tests/TestSIMD.cpp
Normal file
@ -0,0 +1,65 @@
|
||||
/* Quicktex Texture Compression Library
|
||||
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||
and licenced under the public domain
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "TestSIMD.h"
|
||||
|
||||
#include <hwy/highway.h>
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <numeric>
|
||||
|
||||
#include "../VecUtil.h"
|
||||
|
||||
namespace hn = hwy::HWY_NAMESPACE;
|
||||
|
||||
namespace quicktex::tests {
|
||||
|
||||
void TestWidenSumS16() {
|
||||
const hn::ScalableTag<int16_t> tag;
|
||||
const auto vec_size = hn::MaxLanes(tag);
|
||||
std::array<int16_t, vec_size> buffer;
|
||||
|
||||
std::iota(buffer.begin(), buffer.end(), 1);
|
||||
auto v = hn::Load(tag, &buffer[0]);
|
||||
auto sum = WideningSumS16(v);
|
||||
assert(sum == vec_size / 2 * (vec_size + 1)); // Gauss formula
|
||||
|
||||
buffer.fill(1);
|
||||
v = hn::Load(tag, &buffer[0]);
|
||||
sum = WideningSumS16(v);
|
||||
assert(sum == vec_size);
|
||||
|
||||
buffer.fill(0);
|
||||
v = hn::Load(tag, &buffer[0]);
|
||||
sum= WideningSumS16(v);
|
||||
assert(sum == 0);
|
||||
|
||||
buffer.fill(std::numeric_limits<int16_t>::max());
|
||||
v = hn::Load(tag, &buffer[0]);
|
||||
sum= WideningSumS16(v);
|
||||
assert(sum == std::numeric_limits<int16_t>::max() * (int)vec_size);
|
||||
|
||||
buffer.fill(std::numeric_limits<int16_t>::min());
|
||||
v = hn::Load(tag, &buffer[0]);
|
||||
sum= WideningSumS16(v);
|
||||
assert(sum == std::numeric_limits<int16_t>::min() * (int)vec_size);
|
||||
}
|
||||
} // namespace quicktex::tests
|
26
quicktex/tests/TestSIMD.h
Normal file
26
quicktex/tests/TestSIMD.h
Normal file
@ -0,0 +1,26 @@
|
||||
/* Quicktex Texture Compression Library
|
||||
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||
and licenced under the public domain
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
namespace quicktex::tests {
|
||||
|
||||
void TestWidenSumS16();
|
||||
|
||||
} // namespace quicktex::tests
|
44
quicktex/tests/_bindings.cpp
Normal file
44
quicktex/tests/_bindings.cpp
Normal file
@ -0,0 +1,44 @@
|
||||
/* Quicktex Texture Compression Library
|
||||
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||
and licenced under the public domain
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "../_bindings.h"
|
||||
|
||||
#include <pybind11/pybind11.h>
|
||||
#include <pybind11/stl.h>
|
||||
|
||||
#include <array>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <stdexcept>
|
||||
#include <string>
|
||||
|
||||
#include "TestSIMD.h"
|
||||
|
||||
namespace py = pybind11;
|
||||
namespace quicktex::bindings {
|
||||
|
||||
using namespace pybind11::literals;
|
||||
using namespace quicktex::tests;
|
||||
|
||||
void InitCTests(py::module_ &quicktex) {
|
||||
py::module_ ctests = quicktex.def_submodule("_ctests", "Internal tests for C-level functions");
|
||||
|
||||
ctests.def("test_WidenSumS16", &TestWidenSumS16);
|
||||
}
|
||||
} // namespace quicktex::bindings
|
8
tests/test_ctest.py
Normal file
8
tests/test_ctest.py
Normal file
@ -0,0 +1,8 @@
|
||||
import unittest
|
||||
|
||||
import _quicktex._ctests as c
|
||||
|
||||
|
||||
class TestCTest(unittest.TestCase):
|
||||
def test_WidenSumS16(self):
|
||||
c.test_WidenSumS16()
|
@ -52,7 +52,7 @@ function(set_project_warnings project_name)
|
||||
-Wunused # warn on anything being unused
|
||||
-Woverloaded-virtual # warn if you overload (not override) a virtual
|
||||
# function
|
||||
-Wpedantic # warn if non-standard C++ is used
|
||||
# -Wpedantic # warn if non-standard C++ is used
|
||||
#-Wconversion # warn on type conversions that may lose data
|
||||
#-Wsign-conversion # warn on sign conversions
|
||||
-Wnull-dereference # warn if a null dereference is detected
|
||||
|
Loading…
Reference in New Issue
Block a user