23 Commits

Author SHA1 Message Date
a05c1e352e Release 0.2.1
### Fixed

- Fixed broken transparency on palettized PNG files

### Changed

- Changed which wheels are built by the CI. There are no changes to OS or Python version compatibility if you compile from source.
	- Stopped building Python 3.7 wheels
	- Stopped building macOS universal wheels
	- Wheels for macOS now require macOS 12 or later
	- Included macOS ARM wheels 
	- Included Python 3.12 wheels
2024-06-02 18:58:48 -07:00
1f7aad7218 skip python3.7 because its EOL
Still supported for now, just no wheels provided
2024-06-02 17:56:13 -07:00
23133eb802 Fix build matrix and macOS target 2024-06-02 17:43:33 -07:00
0448dbe6e1 Update CIBuildWheel 2024-06-02 17:32:00 -07:00
ec7953dcff Release 0.2.0
### Changed

- Updated Pybind11 to version 3.10, adding Python 3.11 support
- Updated install instructions in readme to reflect availability on PyPI
- Encode now skips .dds files in its input to prevent needless re-encoding

### Added

- Added the `-n` option for bc3 encoding to perform a BC3nm swizzle
2023-06-21 15:46:13 -07:00
3280fc74be Merge branch 'dev' 2023-06-21 15:45:35 -07:00
cbec93ed55 Skip any dds files when encoding 2023-06-21 15:41:08 -07:00
8509384bff Add -n option to encode bc3 which performs a BC3nm swizzle 2023-06-21 15:32:24 -07:00
1c86b09ca0 Fix docs link 2023-06-21 15:31:47 -07:00
d4eada16f9 Update readme with easier install directions 2023-06-21 15:11:17 -07:00
aed575edc6 Release 0.1.4
### Changed

- Updated Pybind11 to version 3.10, adding Python 3.11 support
2022-10-29 23:32:05 -07:00
4cdcb65f3a Merge branch 'dev' 2022-10-29 23:31:45 -07:00
0a66fcca20 Merge pull request #35 from drewcassidy/dependabot/github_actions/dev/pypa/cibuildwheel-2.11.2
Bump pypa/cibuildwheel from 2.5.0 to 2.11.2
2022-10-29 22:13:20 -07:00
37f0673e95 Merge pull request #33 from drewcassidy/dependabot/github_actions/dev/actions/setup-python-4.3.0
Bump actions/setup-python from 4.0.0 to 4.3.0
2022-10-29 22:12:59 -07:00
b81df96990 track python dependencies 2022-10-29 21:55:53 -07:00
38beffef05 Bump pypa/cibuildwheel from 2.5.0 to 2.11.2
Bumps [pypa/cibuildwheel](https://github.com/pypa/cibuildwheel) from 2.5.0 to 2.11.2.
- [Release notes](https://github.com/pypa/cibuildwheel/releases)
- [Changelog](https://github.com/pypa/cibuildwheel/blob/main/docs/changelog.md)
- [Commits](https://github.com/pypa/cibuildwheel/compare/2.5.0...v2.11.2)

---
updated-dependencies:
- dependency-name: pypa/cibuildwheel
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-10-29 21:05:59 -07:00
0dccd1cd07 Update pybind to 3.10 to allow Python 3.11 support 2022-10-29 21:03:12 -07:00
7ea104f712 Bump actions/setup-python from 4.0.0 to 4.3.0
Bumps [actions/setup-python](https://github.com/actions/setup-python) from 4.0.0 to 4.3.0.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v4.0.0...v4.3.0)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-10-10 12:46:28 +00:00
9cb60f0ce2 Merge pull request #21 from drewcassidy/dependabot/github_actions/dev/actions/setup-python-4.0.0
Bump actions/setup-python from 3.1.2 to 4.0.0
2022-06-12 18:02:11 -07:00
15e0c68df6 Merge branch 'dev' into dependabot/github_actions/dev/actions/setup-python-4.0.0 2022-06-12 17:18:18 -07:00
9f54349556 Specify python versions 2022-06-12 17:16:18 -07:00
71c069d30c Bump actions/setup-python from 3.1.2 to 4.0.0
Bumps [actions/setup-python](https://github.com/actions/setup-python) from 3.1.2 to 4.0.0.
- [Release notes](https://github.com/actions/setup-python/releases)
- [Commits](https://github.com/actions/setup-python/compare/v3.1.2...v4.0.0)

---
updated-dependencies:
- dependency-name: actions/setup-python
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2022-06-09 11:32:38 +00:00
661536e6f6 use scoped lock 2022-05-22 20:59:37 -07:00
83 changed files with 1059 additions and 3424 deletions

View File

@ -1,11 +1,10 @@
---
BasedOnStyle: google
IndentWidth: 4
ColumnLimit: 120
ColumnLimit: 160
AllowShortBlocksOnASingleLine: Always
AllowShortFunctionsOnASingleLine: All
AlwaysBreakTemplateDeclarations: MultiLine
#RequiresClausePositionStyle: SingleLine # requires Clang 15 :(
#AlignConsecutiveDeclarations: true
---

View File

@ -2,15 +2,15 @@ FormatStyle: google
Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,-misc-unused-parameters,-misc-non-private-member-variables-in-classes,readability-identifier-naming,cppcoreguidelines-narrowing-conversions'
CheckOptions:
- { key: readability-identifier-naming.NamespaceCase, value: lower_case }
- { key: readability-identifier-naming.ClassCase, value: CamelCase }
- { key: readability-identifier-naming.StructCase, value: CamelCase }
- { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase }
- { key: readability-identifier-naming.FunctionCase, value: lower_case }
- { key: readability-identifier-naming.VariableCase, value: lower_case }
- { key: readability-identifier-naming.MemberCase, value: lower_case }
- { key: readability-identifier-naming.PrivateMemberPrefix, value: _ }
- { key: readability-identifier-naming.ProtectedMemberPrefix, value: _ }
- { key: readability-identifier-naming.NamespaceCase, value: lower_case }
- { key: readability-identifier-naming.ClassCase, value: CamelCase }
- { key: readability-identifier-naming.StructCase, value: CamelCase }
- { key: readability-identifier-naming.TemplateParameterCase, value: CamelCase }
- { key: readability-identifier-naming.FunctionCase, value: aNy_CasE }
- { key: readability-identifier-naming.VariableCase, value: lower_case }
- { key: readability-identifier-naming.MemberCase, value: lower_case }
- { key: readability-identifier-naming.PrivateMemberPrefix, value: _ }
- { key: readability-identifier-naming.ProtectedMemberPrefix, value: _ }
- { key: readability-identifier-naming.EnumConstantCase, value: CamelCase }
- { key: readability-identifier-naming.ConstexprVariableCase, value: CamelCase }
- { key: readability-identifier-naming.GlobalConstantCase, value: CamelCase }

View File

@ -7,4 +7,10 @@ updates:
target-branch: "dev"
schedule:
# Check for updates to GitHub Actions every weekday
interval: "daily"
interval: "daily"
# Maintain dependencies for pip
- package-ecosystem: "pip"
directory: "/"
schedule:
interval: "weekly"

View File

@ -6,39 +6,25 @@ name: Python Package
on: [ push, pull_request ]
jobs:
test:
name: Run Unit Tests
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [ macos-12, windows-latest, ubuntu-latest ]
build-sdist:
name: Build SDist
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: recursive
- name: Set up GCC
if: runner.os == 'linux'
uses: egor-tensin/setup-gcc@v1
with:
version: 10
- name: Setup cmake
uses: jwlawson/actions-setup-cmake@v1.12
with:
cmake-version: 'latest'
github-api-token: ${{ secrets.GITHUB_TOKEN }}
- name: Set up Python
uses: actions/setup-python@v3.1.2
uses: actions/setup-python@v4.3.0
with:
python-version: '3.x'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install flake8 pybind11
python -m pip install flake8
python -m pip install setuptools twine build
- name: Lint with flake8
run: |
@ -47,35 +33,6 @@ jobs:
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
- name: Build C code
run: |
ls
cmake -S . -B build -DQUICKTEX_NOPYTHON=TRUE -DCMAKE_BUILD_TYPE=Debug
cmake --build build
- name: Test C code
run: |
ctest -V --test-dir build -C Debug
build-sdist:
name: Build SDist
runs-on: ubuntu-latest
needs: test
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: recursive
- name: Set up Python
uses: actions/setup-python@v3.1.2
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install setuptools twine build
- name: Build SDist
run: python -m build --sdist
@ -88,22 +45,18 @@ jobs:
path: dist/*.tar.gz
build-wheels:
name: Build Wheels on ${{ matrix.os }} ${{ matrix.arch[0] }}
name: Build Wheels on ${{ matrix.os }}
runs-on: ${{ matrix.os }}
needs: test
strategy:
matrix:
os: [ macos-12, windows-latest, ubuntu-latest ]
arch: [ [ 'x86', 'x86_64', 'AMD64', 'x86_64' ] ] #[suffix, mac, windows, linux] arch names
include:
- os: ubuntu-latest
arch: [ 'ARM', 'arm64', 'ARM64', 'aarch64' ]
# macos-13 is an intel runner, macos-14 is apple silicon
os: [ubuntu-latest, windows-latest, macos-13, macos-14]
linux_arch: [ 'x86_64' ] #[suffix, mac, windows, linux] arch names
steps:
- uses: actions/checkout@v3
with:
fetch-depth: 0
submodules: recursive
- name: Install libomp
if: runner.os == 'macOS'
@ -116,17 +69,19 @@ jobs:
sudo tar fvxz openmp-*.tar.gz -C /
- name: Install QEMU
# install QEMU if building for arm linux
# install QEMU if building for linux
uses: docker/setup-qemu-action@v2
if: runner.os == 'linux' && matrix.arch[3] == 'aarch64'
if: runner.os == 'linux'
with:
platforms: arm64
- name: Build wheels
uses: pypa/cibuildwheel@2.5.0
uses: pypa/cibuildwheel@v2.18.1
env:
MACOSX_DEPLOYMENT_TARGET: "10.9"
CIBW_ARCHS_LINUX: ${{ matrix.arch[3] }}
MACOSX_DEPLOYMENT_TARGET: "12"
CIBW_ARCHS_LINUX: 'x86_64 aarch64'
CIBW_ARCHS_MACOS: 'native'
CIBW_SKIP: 'cp37*'
- name: Upload Wheels
uses: actions/upload-artifact@v3
@ -143,7 +98,9 @@ jobs:
- uses: actions/checkout@v3 # just need the changelog
- name: Set up Python
uses: actions/setup-python@v3.1.2
uses: actions/setup-python@v4.3.0
with:
python-version: '3.x'
- name: Install dependencies
run: |
@ -182,4 +139,4 @@ jobs:
name: ${{ env.VERSION_TITLE }}
body_path: RELEASE.md
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

1
.gitignore vendored
View File

@ -33,4 +33,3 @@ compile_commands.json
CTestTestfile.cmake
_deps
cmake-build-*
*.a

3
.gitmodules vendored
View File

@ -1,3 +0,0 @@
[submodule "external/xsimd"]
path = external/xsimd
url = https://github.com/xtensor-stack/xsimd.git

View File

@ -2,11 +2,33 @@
All notable changes to this project will be documented in this file
## Unreleased
## 0.2.1 - 2024-06-03
### Fixed
- Fixed LeastSquares endpoint mode producint incorrect results
- Fixed broken transparency on palettized PNG files
### Changed
- Changed which wheels are built by the CI. There are no changes to OS or Python version compatibility if you compile from source.
- Stopped building Python 3.7 wheels
- Stopped building macOS universal wheels
- Wheels for macOS now require macOS 12 or later
- Included macOS ARM wheels
- Included Python 3.12 wheels
## 0.2.0 - 2023-06-21
### Changed
- Updated Pybind11 to version 3.10, adding Python 3.11 support
- Updated install instructions in readme to reflect availability on PyPI
- Encode now skips .dds files in its input to prevent needless re-encoding
### Added
- Added the `-n` option for bc3 encoding to perform a BC3nm swizzle
## 0.1.3 - 2022-04-13

View File

@ -1,14 +1,63 @@
cmake_minimum_required(VERSION 3.18)
include(tools/CompilerWarnings.cmake)
include(tools/SIMDFlags.cmake)
set(CMAKE_VERBOSE_MAKEFILE ON)
project(quicktex)
add_subdirectory(external/xsimd)
# Find dependencies
find_package(Python COMPONENTS Interpreter Development.Module)
find_package(pybind11 CONFIG REQUIRED)
find_package(OpenMP)
add_subdirectory(quicktex)
add_subdirectory(tests)
# Collect source files
file(GLOB SOURCE_FILES
"quicktex/*.cpp"
"quicktex/s3tc/*.cpp"
"quicktex/s3tc/bc1/*.cpp"
"quicktex/s3tc/bc3/*.cpp"
"quicktex/s3tc/bc4/*.cpp"
"quicktex/s3tc/bc5/*.cpp"
"quicktex/s3tc/interpolator/*.cpp"
)
enable_testing ()
add_test (NAME QuicktexTest COMMAND Test)
file(GLOB HEADER_FILES
"quicktex/*.h"
"quicktex/s3tc/*.h"
"quicktex/s3tc/bc1/*.h"
"quicktex/s3tc/bc3/*.h"
"quicktex/s3tc/bc4/*.h"
"quicktex/s3tc/bc5/*.h"
"quicktex/s3tc/interpolator/*.h"
)
file(GLOB_RECURSE PYTHON_FILES "src/**/*.py")
# Organize source files together for some IDEs
source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCE_FILES} ${HEADER_FILES} ${PYTHON_FILES})
# Add python module
pybind11_add_module(_quicktex
${SOURCE_FILES}
${HEADER_FILES})
# Set Quicktex version info
target_compile_definitions(_quicktex PRIVATE VERSION_INFO=${QUICKTEX_VERSION_INFO})
# enable openMP if available
if (OpenMP_CXX_FOUND)
target_link_libraries(_quicktex PUBLIC OpenMP::OpenMP_CXX)
endif ()
# Set module features, like C/C++ standards
target_compile_features(_quicktex PUBLIC cxx_std_17 c_std_11)
# Set compiler warnings
set_project_warnings(_quicktex)
# Clang-specific
if (CMAKE_CXX_COMPILER_ID MATCHES ".*Clang")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -stdlib=libc++ -lc++abi")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -stdlib=libc++ -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -stdlib=libc++ -fsanitize=undefined")
set(PROJECT_WARNINGS ${CLANG_WARNINGS})
endif ()

View File

@ -1,2 +0,0 @@
graft external
global-exclude *.afdesign # this is currently the vast majority of the repo size

View File

@ -9,15 +9,32 @@ comparable to the original library.
## Installation
To install, first clone this repo and cd into it, then run:
### From Wheel (Easiest)
To install, run
```shell
pip install quicktex
```
If you are on macOS, You need to install openMP from homebrew:
```shell
brew install libomp
```
### From Source
To build from source, first clone this repo and cd into it, then run:
```shell
git submodule update --init
pip install .
```
and setuptools will take care of any dependencies for you.
If you are on macOS, it is recommended to first install openMP from homebrew to enable
If you are on macOS, it is recommended to first install openMP from homebrew to enable
multithreading, since it is not included in the default Apple Clang install:
```shell
@ -31,8 +48,6 @@ required dependencies for them, install with options like so:
pip install .[tests,stubs,docs]
```
Quicktex will be available on Pypi once it is out of alpha.
## Usage
```

1
external/xsimd vendored

Submodule external/xsimd deleted from 1577b02d54

View File

@ -4,7 +4,7 @@ requires = [
"setuptools_scm>=6.2",
"wheel",
"cmake>=3.18",
"pybind11~=2.6.1",
"pybind11~=2.10",
"ninja; sys_platform != 'win32'",
]
build-backend = "setuptools.build_meta"
@ -46,7 +46,7 @@ docs = [
stubs = ["pybind11-stubgen"]
[project.urls]
Docs = "https://quicktex.readthedocs.io/en/"
Docs = "https://quicktex.readthedocs.io/en/latest/"
Source = "https://github.com/drewcassidy/quicktex"
Changelog = "https://github.com/drewcassidy/quicktex/blob/main/CHANGELOG.md"
@ -64,10 +64,8 @@ package-dir = { '' = '.' } # without this line, C++ source files get included in
[tool.cibuildwheel]
build = "cp*" # only build wheels for cpython.
build-frontend = "build"
test-command = "pytest {project}/tests --verbose --full-trace --capture=tee-sys"
test-command = "pytest {project}/tests --verbose"
test-extras = ["tests"]
test-skip = "*-macosx_arm64 *-macosx_universal2:arm64" # skip testing on arm macOS because CIBW doesnt support it
environment = { QUICKTEX_SIMD_MODE = "SSE4" } # SSE4 has a 99% market share and was released under the Bush administration
[tool.cibuildwheel.macos]
archs = ["x86_64", "universal2"] # build fat binaries, or x86-64 for python 3.7
@ -77,16 +75,11 @@ skip = ["cp{38,39,31*}-macosx_x86_64"] # skip x86-only builds where fat binaries
archs = ["auto64"] # arm64 windows builds not yet supported
[tool.cibuildwheel.linux]
skip = ["*musllinux*"]
skip = ["cp37-musllinux*", "*musllinux_aarch64*"] # skip targets without available Pillow wheels
manylinux-x86_64-image = "manylinux2014"
manylinux-aarch64-image = "manylinux2014"
[tool.black]
line-length = 120 # 80-column is stupid
target-version = ['py37', 'py38', 'py39', 'py310']
skip-string-normalization = true
[tool.pytest.ini_options]
minversion = "7.0"
addopts = ["--full-trace", "--capture=tee-sys"]
testpaths = ["tests"]
target-version = ['py37', 'py38', 'py39', 'py310', 'py310']
skip-string-normalization = true

View File

@ -1,71 +0,0 @@
# Find dependencies
if (NOT QUICKTEX_NOPYTHON)
find_package(Python COMPONENTS Interpreter Development.Module)
find_package(pybind11 CONFIG REQUIRED)
endif ()
find_package(OpenMP)
#Collect source files
set(SOURCE_FILES
Matrix4x4.cpp OldColor.cpp
s3tc/bc1/BC1Block.cpp s3tc/bc1/BC1Decoder.cpp
s3tc/bc1/BC1Encoder.cpp s3tc/bc1/OrderTable.cpp s3tc/bc1/OrderTable4.cpp
s3tc/bc3/BC3Decoder.cpp s3tc/bc3/BC3Encoder.cpp
s3tc/bc4/BC4Block.cpp s3tc/bc4/BC4Decoder.cpp s3tc/bc4/BC4Encoder.cpp
s3tc/bc5/BC5Decoder.cpp s3tc/bc5/BC5Encoder.cpp
s3tc/interpolator/Interpolator.cpp
texture/RawTexture.cpp texture/Window.cpp test.cpp)
set(BINDING_FILES
_bindings.cpp
s3tc/_bindings.cpp
s3tc/bc1/_bindings.cpp
s3tc/bc3/_bindings.cpp
s3tc/bc4/_bindings.cpp
s3tc/bc5/_bindings.cpp
s3tc/interpolator/_bindings.cpp)
file(GLOB_RECURSE HEADER_FILES "**.h")
file(GLOB_RECURSE PYTHON_FILES "**.py")
# Organize source files together for some IDEs
source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCE_FILES} ${BINDING_FILES} ${HEADER_FILES} ${PYTHON_FILES})
# Declare implementation module
add_library(quicktex STATIC ${SOURCE_FILES} ${HEADER_FILES})
# Link openMP if available
if (OpenMP_CXX_FOUND)
target_link_libraries(quicktex PUBLIC OpenMP::OpenMP_CXX)
endif ()
# Link XSimd
target_link_libraries(quicktex PUBLIC xsimd)
# Set library features, like C/C++ standards
target_compile_features(quicktex PUBLIC cxx_std_20 c_std_11)
set_property(TARGET quicktex PROPERTY CXX_VISIBILITY_PRESET hidden)
set_property(TARGET quicktex PROPERTY POSITION_INDEPENDENT_CODE ON)
# Include source root for project-relative includes
target_include_directories(quicktex PUBLIC .)
# Set compiler warnings and SIMD flags
set_project_warnings(quicktex)
set_simd_flags(quicktex)
if (NOT QUICKTEX_NOPYTHON)
# Declare python module
pybind11_add_module(_quicktex ${BINDING_FILES} ${HEADER_FILES})
target_compile_definitions(_quicktex PRIVATE VERSION_INFO=${QUICKTEX_VERSION_INFO})
# Link python module with implementation
target_link_libraries(_quicktex PUBLIC quicktex)
if ((NOT MSVC) AND (CMAKE_BUILD_TYPE MATCHES Debug) AND ($ENV{QUICKTEX_SANITIZE}))
target_compile_options(_quicktex PUBLIC -fsanitize=address,undefined -fno-sanitize-recover=address,undefined -fno-omit-frame-pointer)
target_link_options(_quicktex PUBLIC -fsanitize=address,undefined -fno-sanitize-recover=address,undefined -fno-omit-frame-pointer)
endif ()
endif ()

View File

@ -16,19 +16,18 @@
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "OldColor.h"
#include "Color.h"
#include <algorithm>
#include <stdexcept>
#include "Vector4.h"
#include "Vector4Int.h"
#include "util/bitbash.h"
#include "util/math.h" // for scale_to_8<5>, scale_from_8<5>, assert5bit, scale_to_8<6>
#include "util.h" // for scale5To8, scale8To5, assert5bit, scale6To8
namespace quicktex {
OldColor::OldColor(Vector4Int v) {
Color::Color(Vector4Int v) {
if (v.MaxAbs() > 0xFF) throw std::invalid_argument("Vector members out of range");
for (int i = 0; i < 4; i++) {
if (v[i] < 0) throw std::range_error("Color members cannot be negative");
@ -40,42 +39,40 @@ OldColor::OldColor(Vector4Int v) {
a = static_cast<uint8_t>(v[3]);
}
uint16_t OldColor::Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b) {
uint16_t Color::Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b) {
assert5bit(r);
assert6bit(g);
assert5bit(b);
return static_cast<uint16_t>(b | (g << 5) | (r << 11));
}
uint16_t OldColor::Pack565(uint8_t r, uint8_t g, uint8_t b) {
return Pack565Unscaled(scale_from_8<5>(r), scale_from_8<6>(g), scale_from_8<5>(b));
}
uint16_t Color::Pack565(uint8_t r, uint8_t g, uint8_t b) { return Pack565Unscaled(scale8To5(r), scale8To6(g), scale8To5(b)); }
OldColor OldColor::Unpack565Unscaled(uint16_t Packed) {
Color Color::Unpack565Unscaled(uint16_t Packed) {
uint8_t r = (Packed >> 11) & 0x1F;
uint8_t g = (Packed >> 5) & 0x3F;
uint8_t b = Packed & 0x1F;
return OldColor(r, g, b);
return Color(r, g, b);
}
OldColor OldColor::Unpack565(uint16_t Packed) {
uint8_t r = static_cast<uint8_t>(scale_to_8<5>((Packed >> 11) & 0x1FU));
uint8_t g = static_cast<uint8_t>(scale_to_8<6>((Packed >> 5) & 0x3FU));
uint8_t b = static_cast<uint8_t>(scale_to_8<5>(Packed & 0x1FU));
Color Color::Unpack565(uint16_t Packed) {
uint8_t r = static_cast<uint8_t>(scale5To8((Packed >> 11) & 0x1FU));
uint8_t g = static_cast<uint8_t>(scale6To8((Packed >> 5) & 0x3FU));
uint8_t b = static_cast<uint8_t>(scale5To8(Packed & 0x1FU));
return OldColor(r, g, b);
return Color(r, g, b);
}
OldColor OldColor::PreciseRound565(Vector4 &v) {
Color Color::PreciseRound565(Vector4 &v) {
int trial_r = (int)(v[0] * UINT5_MAX);
int trial_g = (int)(v[1] * UINT6_MAX);
int trial_b = (int)(v[2] * UINT5_MAX);
// clamp to prevent weirdness with slightly out of bounds float values
uint8_t r = (uint8_t)clamp<int>(trial_r, 0, UINT5_MAX);
uint8_t g = (uint8_t)clamp<int>(trial_g, 0, UINT6_MAX);
uint8_t b = (uint8_t)clamp<int>(trial_b, 0, UINT5_MAX);
uint8_t r = (uint8_t)clampi(trial_r, 0, UINT5_MAX);
uint8_t g = (uint8_t)clampi(trial_g, 0, UINT6_MAX);
uint8_t b = (uint8_t)clampi(trial_b, 0, UINT5_MAX);
// increment each channel if above the rounding point
r += v[0] > Midpoints5bit[r];
@ -86,36 +83,46 @@ OldColor OldColor::PreciseRound565(Vector4 &v) {
assert6bit(g);
assert5bit(b);
return OldColor(r, g, b);
return Color(r, g, b);
}
void OldColor::SetRGB(uint8_t vr, uint8_t vg, uint8_t vb) {
void Color::SetRGB(uint8_t vr, uint8_t vg, uint8_t vb) {
r = vr;
g = vg;
b = vb;
}
size_t OldColor::MaxChannelRGB() {
size_t Color::MinChannelRGB() {
if (r <= g && r <= b) return 0;
if (g <= b && g <= r) return 1;
return 2;
}
size_t Color::MaxChannelRGB() {
if (r >= g && r >= b) return 0;
if (g >= b && g >= r) return 1;
return 2;
}
OldColor::operator Vector4() const { return Vector4(r, g, b, a); }
OldColor::operator Vector4Int() const { return Vector4Int(r, g, b, a); }
Vector4Int operator-(const OldColor &lhs, const OldColor &rhs) {
Color Color::Min(const Color &A, const Color &B) { return Color(std::min(A[0], B[0]), std::min(A[1], B[1]), std::min(A[2], B[2]), std::min(A[3], B[3])); }
Color Color::Max(const Color &a, const Color &b) { return Color(std::max(a[0], b[0]), std::max(a[1], b[1]), std::max(a[2], b[2]), std::max(a[3], b[3])); }
Color::operator Vector4() const { return Vector4(r, g, b, a); }
Color::operator Vector4Int() const { return Vector4Int(r, g, b, a); }
Vector4Int operator-(const Color &lhs, const Color &rhs) {
Vector4Int result;
for (unsigned i = 0; i < 4; i++) { result[i] = (int)lhs[i] - rhs[i]; }
return result;
}
uint16_t OldColor::Pack565() const { return Pack565(r, g, b); }
uint16_t OldColor::Pack565Unscaled() const { return Pack565Unscaled(r, g, b); }
uint16_t Color::Pack565() const { return Pack565(r, g, b); }
uint16_t Color::Pack565Unscaled() const { return Pack565Unscaled(r, g, b); }
OldColor OldColor::ScaleTo565() const { return OldColor(scale_from_8<5>(r), scale_from_8<6>(g), scale_from_8<5>(b)); }
OldColor OldColor::ScaleFrom565() const { return OldColor(scale_to_8<5>(r), scale_to_8<6>(g), scale_to_8<5>(b)); }
Color Color::ScaleTo565() const { return Color(scale8To5(r), scale8To6(g), scale8To5(b)); }
Color Color::ScaleFrom565() const { return Color(scale5To8(r), scale6To8(g), scale5To8(b)); }
bool OldColor::operator==(const OldColor &Rhs) const { return r == Rhs.r && g == Rhs.g && b == Rhs.b && a == Rhs.a; }
bool OldColor::operator!=(const OldColor &Rhs) const { return !(Rhs == *this); }
bool Color::operator==(const Color &Rhs) const { return r == Rhs.r && g == Rhs.g && b == Rhs.b && a == Rhs.a; }
bool Color::operator!=(const Color &Rhs) const { return !(Rhs == *this); }
} // namespace quicktex

View File

@ -1,5 +1,5 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Copyright (C) 2021-2022 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
@ -18,60 +18,82 @@
*/
#pragma once
#include "Matrix.h"
#include "util/bitbash.h"
#include <cassert> // for assert
#include <cstddef> // for size_t
#include <cstdint> // for uint8_t, uint16_t
namespace quicktex {
class Vector4;
class Vector4Int;
using Color = Vec<uint8_t, 4>;
using ColorRGB = Vec<uint8_t, 3>;
constexpr size_t uint5_max = (1 << 5) - 1;
constexpr size_t uint6_max = (1 << 6) - 1;
template <size_t N> struct MidpointTable {
#pragma pack(push, 1)
class Color {
public:
constexpr MidpointTable() : _values() {
constexpr float fN = (float)N;
for (unsigned i = 0; i < N - 1; i++) { _values[i] = ((float)i / fN) + (0.5f / fN); }
_values[N - 1] = 1e+37f;
uint8_t r;
uint8_t g;
uint8_t b;
uint8_t a;
constexpr Color() : Color(0, 0, 0, 0xFF) {}
constexpr Color(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va = 0xFF) : r(vr), g(vg), b(vb), a(va) {}
Color(Vector4Int v);
static uint16_t Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b);
static uint16_t Pack565(uint8_t r, uint8_t g, uint8_t b);
static Color Unpack565Unscaled(uint16_t Packed);
static Color Unpack565(uint16_t Packed);
static Color PreciseRound565(Vector4 &v);
static Color Min(const Color &A, const Color &B);
static Color Max(const Color &A, const Color &B);
bool operator==(const Color &Rhs) const;
bool operator!=(const Color &Rhs) const;
uint8_t operator[](size_t index) const {
assert(index < 4);
return reinterpret_cast<const uint8_t *>(this)[index];
}
uint8_t &operator[](size_t index) {
assert(index < 4);
return reinterpret_cast<uint8_t *>(this)[index];
}
float operator[](size_t i) const {
assert(i < N);
return _values[i];
}
operator Vector4() const;
operator Vector4Int() const;
friend Vector4Int operator-(const Color &lhs, const Color &rhs);
void SetRGB(uint8_t vr, uint8_t vg, uint8_t vb);
void SetRGB(const Color &other) { SetRGB(other.r, other.g, other.b); }
uint16_t Pack565() const;
uint16_t Pack565Unscaled() const;
Color ScaleTo565() const;
Color ScaleFrom565() const;
size_t MinChannelRGB();
size_t MaxChannelRGB();
bool IsGrayscale() const { return ((r == g) && (r == b)); }
bool IsBlack() const { return (r | g | b) < 4; }
int GetLuma() const { return (13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U; } // REC709 weightings
private:
float _values[N];
static constexpr float Midpoints5bit[32] = {.015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f,
.370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f,
.725490f, .758824f, .792157f, .823529f, .854902f, .888235f, .921569f, .952941f, .984314f, 1e+37f};
static constexpr float Midpoints6bit[64] = {.007843f, .023529f, .039216f, .054902f, .070588f, .086275f, .101961f, .117647f, .133333f, .149020f, .164706f,
.180392f, .196078f, .211765f, .227451f, .245098f, .262745f, .278431f, .294118f, .309804f, .325490f, .341176f,
.356863f, .372549f, .388235f, .403922f, .419608f, .435294f, .450980f, .466667f, .482353f, .500000f, .517647f,
.533333f, .549020f, .564706f, .580392f, .596078f, .611765f, .627451f, .643137f, .658824f, .674510f, .690196f,
.705882f, .721569f, .737255f, .754902f, .772549f, .788235f, .803922f, .819608f, .835294f, .850980f, .866667f,
.882353f, .898039f, .913725f, .929412f, .945098f, .960784f, .976471f, .992157f, 1e+37f};
};
constexpr MidpointTable<32> Midpoints5bit;
constexpr MidpointTable<64> Midpoints6bit;
template <typename T> Vec<T, 3> scale_to_565(Vec<T, 3> unscaled) {
return Vec<T, 3>{scale_from_8<T, 5>(unscaled.r()), scale_from_8<T, 6>(unscaled.g()),
scale_from_8<T, 5>(unscaled.b())};
}
template <typename T> Vec<T, 3> scale_from_565(Vec<T, 3> scaled) {
return Vec<T, 3>{scale_to_8<T, 5>(scaled.r()), scale_to_8<T, 6>(scaled.g()), scale_to_8<T, 5>(scaled.b())};
}
template <typename T = int16_t> Vec<T, 3> precise_round_565(Vec<float, 3> &v) {
auto scaled = v * Vec<float, 3>{uint5_max, uint6_max, uint5_max}; // rescale by from (0,1) to (0,int_max)
auto rounded = (Vec<T, 3>)scaled; // downcast to integral type
rounded = rounded.clamp({0, 0, 0}, {uint5_max, uint6_max, uint5_max}); // clamp to avoid out of bounds float errors
// increment each channel if above the rounding point
if (v.r() > Midpoints5bit[rounded.r()]) rounded.r()++;
if (v.g() > Midpoints6bit[rounded.g()]) rounded.g()++;
if (v.b() > Midpoints5bit[rounded.b()]) rounded.b()++;
assert(rounded.r() <= uint5_max);
assert(rounded.g() <= uint6_max);
assert(rounded.b() <= uint5_max);
return rounded;
}
#pragma pack(pop)
} // namespace quicktex

View File

@ -25,7 +25,7 @@
#include <cstring>
#include <stdexcept>
#include "OldColor.h"
#include "Color.h"
#include "Vector4Int.h"
namespace quicktex {
@ -34,9 +34,9 @@ using Coords = std::tuple<int, int>;
template <int N, int M> class ColorBlock {
public:
struct Metrics {
OldColor min;
OldColor max;
OldColor avg;
Color min;
Color max;
Color avg;
bool is_greyscale;
bool has_black;
Vector4Int sums;
@ -45,37 +45,37 @@ template <int N, int M> class ColorBlock {
static constexpr int Width = N;
static constexpr int Height = M;
constexpr OldColor Get(int x, int y) const {
constexpr Color Get(int x, int y) const {
if (x >= Width || x < 0) throw std::invalid_argument("x value out of range");
if (y >= Height || y < 0) throw std::invalid_argument("y value out of range");
return _pixels[x + (N * y)];
}
constexpr OldColor Get(int i) const {
constexpr Color Get(int i) const {
if (i >= N * M || i < 0) throw std::invalid_argument("i value out of range");
return _pixels[i];
}
void Set(int x, int y, const OldColor &value) {
void Set(int x, int y, const Color &value) {
if (x >= Width || x < 0) throw std::invalid_argument("x value out of range");
if (y >= Height || y < 0) throw std::invalid_argument("y value out of range");
_pixels[x + (N * y)] = value;
}
void Set(int i, const OldColor &value) {
void Set(int i, const Color &value) {
if (i >= N * M || i < 0) throw std::invalid_argument("i value out of range");
_pixels[i] = value;
}
void GetRow(int y, OldColor *dst) const {
void GetRow(int y, Color *dst) const {
if (y >= Height || y < 0) throw std::invalid_argument("y value out of range");
std::memcpy(dst, &_pixels[N * y], N * sizeof(OldColor));
std::memcpy(dst, &_pixels[N * y], N * sizeof(Color));
}
void SetRow(int y, const OldColor *src) {
void SetRow(int y, const Color *src) {
if (y >= Height || y < 0) throw std::invalid_argument("y value out of range");
std::memcpy(&_pixels[N * y], src, N * sizeof(OldColor));
std::memcpy(&_pixels[N * y], src, N * sizeof(Color));
}
bool IsSingleColor() const {
@ -88,8 +88,8 @@ template <int N, int M> class ColorBlock {
Metrics GetMetrics(bool ignore_black = false) const {
Metrics metrics;
metrics.min = OldColor(UINT8_MAX, UINT8_MAX, UINT8_MAX);
metrics.max = OldColor(0, 0, 0);
metrics.min = Color(UINT8_MAX, UINT8_MAX, UINT8_MAX);
metrics.max = Color(0, 0, 0);
metrics.has_black = false;
metrics.is_greyscale = true;
metrics.sums = {0, 0, 0};
@ -97,7 +97,7 @@ template <int N, int M> class ColorBlock {
unsigned total = 0;
for (unsigned i = 0; i < M * N; i++) {
OldColor val = Get(i);
Color val = Get(i);
bool is_black = val.IsBlack();
metrics.has_black |= is_black;
@ -118,7 +118,7 @@ template <int N, int M> class ColorBlock {
}
private:
std::array<OldColor, N * M> _pixels;
std::array<Color, N * M> _pixels;
};
} // namespace quicktex

View File

@ -22,7 +22,7 @@
#include <memory>
#include "ColorBlock.h"
#include "texture/RawTexture.h"
#include "Texture.h"
namespace quicktex {
@ -46,19 +46,19 @@ template <class T> class BlockDecoder : public Decoder<T> {
virtual DecodedBlock DecodeBlock(const EncodedBlock &block) const = 0;
virtual RawTexture Decode(const T &encoded) const override {
auto decoded = RawTexture(encoded.width, encoded.height);
auto decoded = RawTexture(encoded.Width(), encoded.Height());
int blocks_x = encoded.bwidth();
int blocks_y = encoded.bheight();
int blocks_x = encoded.BlocksX();
int blocks_y = encoded.BlocksY();
// from experimentation, multithreading this using OpenMP actually makes decoding slower
// due to thread creation/teardown taking longer than the decoding process itself.
// As a result, this is left as a serial operation despite being embarassingly parallelizable
for (int y = 0; y < blocks_y; y++) {
for (int x = 0; x < blocks_x; x++) {
auto block = encoded.get_block(x, y);
auto block = encoded.GetBlock(x, y);
auto pixels = DecodeBlock(block);
decoded.set_block<BlockWidth, BlockHeight>(x, y, pixels);
decoded.SetBlock<BlockWidth, BlockHeight>(x, y, pixels);
}
}

View File

@ -22,7 +22,7 @@
#include <memory>
#include "ColorBlock.h"
#include "texture/RawTexture.h"
#include "Texture.h"
namespace quicktex {
@ -46,22 +46,21 @@ template <typename T> class BlockEncoder : public Encoder<T> {
virtual EncodedBlock EncodeBlock(const DecodedBlock &block) const = 0;
virtual T Encode(const RawTexture &decoded) const override {
auto encoded = T(decoded.width, decoded.height);
auto encoded = T(decoded.Width(), decoded.Height());
unsigned blocks_x = encoded.bwidth();
unsigned blocks_y = encoded.bheight();
int blocks_x = encoded.BlocksX();
int blocks_y = encoded.BlocksY();
// from experimentation, multithreading this using OpenMP sometimes actually makes encoding slower
// due to thread creation/teardown taking longer than the encoding process itself.
// As a result, this is sometimes left as a serial operation despite being embarassingly parallelizable
// threshold for number of blocks before multithreading is set by overriding MTThreshold()
#pragma omp parallel for if (blocks_x * blocks_y >= MTThreshold())
for (int y = 0; y < (int)blocks_y; y++) {
for (int x = 0; x < (int)blocks_x; x++) {
// index variables have to be signed for MSVC for some reason
auto pixels = decoded.get_block<BlockWidth, BlockHeight>(x, y);
for (int y = 0; y < blocks_y; y++) {
for (int x = 0; x < blocks_x; x++) {
auto pixels = decoded.GetBlock<BlockWidth, BlockHeight>(x, y);
auto block = EncodeBlock(pixels);
encoded.set_block(x, y, block);
encoded.SetBlock(x, y, block);
}
}

View File

@ -1,457 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <algorithm>
#include <cstdint>
#include <numeric>
#include <xsimd/xsimd.hpp>
#include "util/iterator.h"
#include "util/map.h"
#include "util/math.h"
#include "util/ranges.h"
namespace quicktex {
template <typename T, int M, int N> class Matrix;
template <typename T, int M> using Vec = Matrix<T, M, 1>;
// region helper concepts
template <typename L, typename R, typename Op>
concept operable = requires(L &l, R &r, Op &op) { op(l, r); };
template <typename V>
concept is_matrix = requires(V &v) {
V::width();
V::height();
V::value_type;
} && std::same_as < Matrix<typename V::value_type, V::height(), V::width()>,
std::remove_cvref_t < V >> ;
template <typename V> struct vector_stats {
static constexpr int width = 1;
static constexpr int height = 1;
static constexpr int dims = 0;
};
template <typename V>
requires is_matrix<V>
struct vector_stats<V> {
static constexpr int width = V::width;
static constexpr int height = V::height;
static constexpr int dims = V::dims;
};
template <typename V> constexpr int vector_width = vector_stats<V>::width;
template <typename V> constexpr int vector_height = vector_stats<V>::height;
template <typename V> constexpr int vector_dims = vector_stats<V>::dims;
// endregion
template <typename R, typename T, int N> class VecBase {
public:
constexpr VecBase(T scalar = T()) : _c{} { _c.fill(scalar); }
protected:
const R &_at(int index) const { return _c.at(index); }
R &_at(int index) { return _c.at(index); }
constexpr auto _begin() const { return _c.data(); }
constexpr auto _begin() { return _c.data(); }
constexpr auto _end() const { return _c.data() + N; }
constexpr auto _end() { return _c.data() + N; }
private:
std::array<R, N> _c;
};
template <typename T, int N, int M> using matrix_row_type = std::conditional_t<N <= 1, T, Vec<T, N>>;
template <typename T, int N, int M> using matrix_column_type = std::conditional_t<M <= 1, T, Vec<T, M>>;
/**
* A matrix of values that can be operated on
* @tparam T Scalar type
* @tparam N Width of the matrix
* @tparam M Height of the matrix
*/
template <typename T, int M, int N>
class Matrix : public VecBase<std::conditional_t<N == 1, T, VecBase<T, T, N>>, T, M> {
public:
using base = VecBase<std::conditional_t<N == 1, T, VecBase<T, T, N>>, T, M>;
using value_type = T;
using row_type = matrix_row_type<T, N, M>;
using column_type = matrix_column_type<T, N, M>;
using base::base;
// using base::begin;
// using base::end;
// using base::operator[];
// region constructors
/**
* Create a vector from an intializer list
* @param il values to populate with
*/
Matrix(std::initializer_list<row_type> il) : base() {
assert(il.size() == M); // ensure il is of the right size
std::copy_n(il.begin(), M, this->begin());
}
/**
* Create a vector from a scalar value
* @param scalar value to populate with
*/
// constexpr Matrix(const T &scalar) { std::fill(this->begin(), this->end(), scalar); }
/**
* Create a vector from an iterator
* @tparam II input iterator type
* @param input_iterator iterator to copy from
*/
template <typename II>
requires std::input_iterator<II> && std::convertible_to<std::iter_value_t<II>,
const row_type>
Matrix(const II input_iterator) : base() {
std::copy_n(input_iterator, M, this->begin());
}
/**
* Create a vector from a range type
* @tparam R Range type
* @param input_range Range to copy from
*/
template <typename R>
Matrix(const R &input_range)
requires range<R> && std::convertible_to<typename R::value_type, row_type>
: Matrix(input_range.begin()) {
assert(std::distance(input_range.begin(), input_range.end()) == M);
}
template <typename R = T>
requires(N == M)
static constexpr Matrix identity() {
Matrix result = Matrix(0);
for (int i = 0; i < N; i++) { result.element(i, i) = 1; }
return result;
}
// endregion
// region iterators and accessors
static constexpr int size() { return M; }
static constexpr int width = N;
static constexpr int height = M;
static constexpr int elements = N * M;
static constexpr int dims = ((width > 1) ? 1 : 0) + ((height > 1) ? 1 : 0);
const row_type &at(int index) const {
assert(index >= 0 && index < M);
return static_cast<const row_type &>(base::_at(index));
}
row_type &at(int index) {
assert(index >= 0 && index < M);
return static_cast<row_type &>(base::_at(index));
}
const row_type &operator[](int index) const { return at(index); }
row_type &operator[](int index) { return at(index); }
const row_type *begin() const { return static_cast<const row_type *>(base::_begin()); }
row_type *begin() { return static_cast<row_type *>(base::_begin()); }
const row_type *end() const { return static_cast<const row_type *>(base::_end()); }
row_type *end() { return static_cast<row_type *>(base::_end()); }
auto column_begin() const { return column_iterator(this, 0); }
auto column_end() const { return column_iterator(this, N); }
auto all_begin() const { return linear_iterator<const Matrix>(this, 0); }
auto all_begin() { return linear_iterator<Matrix>(this, 0); }
auto all_end() const { return linear_iterator<const Matrix>(this, N * M); }
auto all_end() { return linear_iterator<Matrix>(this, N * M); }
const row_type &get_row(int m) const { return static_cast<const row_type &>(this->at(m)); }
template <typename R> void set_row(int m, const R &value) { this->at(m) = value; }
template <typename S = T> column_type get_column(int n) const {
if constexpr (M == 1) {
return element(0, n);
} else {
column_type ret;
for (int m = 0; m < M; m++) { ret[m] = element(m, n); }
return ret;
}
}
void set_column(int n, const column_type &value) {
if constexpr (M == 1) {
element(0, n) = value;
} else {
for (int m = 0; m < M; m++) { element(m, n) = value[m]; }
}
}
// n/m accessors
const T &element(int m, int n) const {
if constexpr (N == 1) {
return this->at(m);
} else {
return this->at(m)[n];
}
}
T &element(int n, int m) { return const_cast<T &>(static_cast<const Matrix &>(*this).element(n, m)); }
// linear accessors
const T &element(int i) const { return element(i / N, i % N); }
T &element(int i) { return element(i / N, i % N); }
// RGBA accessors
const T &r() const { return (*this)[0]; }
T &r() { return this->at(0); }
template <typename S = T> std::enable_if_t<M >= 2, const S &> g() const { return this->at(1); }
template <typename S = T> std::enable_if_t<M >= 2, S &> g() { return this->at(1); }
template <typename S = T> std::enable_if_t<M >= 3, const S &> b() const { return this->at(2); }
template <typename S = T> std::enable_if_t<M >= 3, S &> b() { return this->at(2); }
template <typename S = T> std::enable_if_t<M >= 4, const S &> a() const { return this->at(3); }
template <typename S = T> std::enable_if_t<M >= 4, S &> a() { return this->at(3); }
// XYZW accessors
const T &x() const { return this->at(0); }
T &x() { return this->at(0); }
template <typename S = T> std::enable_if_t<M >= 2, const S &> y() const { return this->at(1); }
template <typename S = T> std::enable_if_t<M >= 2, S &> y() { return this->at(1); }
template <typename S = T> std::enable_if_t<M >= 3, const S &> z() const { return this->at(2); }
template <typename S = T> std::enable_if_t<M >= 3, S &> z() { return this->at(2); }
template <typename S = T> std::enable_if_t<M >= 4, const S &> w() const { return this->at(3); }
template <typename S = T> std::enable_if_t<M >= 4, S &> w() { return this->at(3); }
// endregion
template <typename R>
requires std::equality_comparable_with<T, R> bool
operator==(const Matrix<R, M, N> &rhs) const {
return size() == rhs.size() && std::equal(this->begin(), this->end(), rhs.begin());
};
// unary vector negation
template <typename S = T>
requires(!std::unsigned_integral<T>) && requires(T &t) { -t; }
Matrix operator-() const {
return map(std::negate(), *this);
};
// add vectors
template <typename R>
requires operable<R, T, std::plus<>>
Matrix operator+(const Matrix<R, M, N> &rhs) const {
return map(std::plus(), *this, rhs);
};
// subtract vectors
template <typename R>
requires operable<R, T, std::minus<>>
Matrix operator-(const Matrix<R, M, N> &rhs) const {
// we can't just add the negation because that's invalid for int types
return map(std::minus(), *this, rhs);
};
// multiply matrix with a matrix or column vector
template <typename R, int P>
requires(P == 1 || P == N) && operable<R, T, std::multiplies<>>
Matrix operator*(const Matrix<R, M, P> &rhs) const {
return map(std::multiplies(), *this, rhs);
};
// multiply matrix with a scalar
template <typename R>
requires operable<R, T, std::multiplies<>>
Matrix operator*(const R &rhs) const {
return map(std::multiplies(), *this, rhs);
};
// divides a matrix by a matrix or column vector
template <typename R, int NN>
requires(NN == 1 || NN == N) && operable<R, T, std::divides<>>
Matrix operator/(const Matrix<R, M, NN> &rhs) const {
return map(std::divides(), *this, rhs);
};
// divides a matrix by a scalar
template <typename R>
requires operable<R, T, std::divides<>>
Matrix operator/(const R &rhs) const {
return map(std::divides(), *this, rhs);
};
// add-assigns a matrix with a matrix
template <typename R>
requires operable<Matrix, R, std::plus<>>
Matrix &operator+=(const R &rhs) {
return *this = *this + rhs;
}
// subtract-assigns a matrix with a matrix
template <typename R>
requires operable<Matrix, R, std::minus<>>
Matrix &operator-=(const R &rhs) {
return *this = *this - rhs;
}
// multiply-assigns a matrix with a matrix, column vector, or a scalar
template <typename R>
requires operable<Matrix, R, std::multiplies<>>
Matrix &operator*=(const R &rhs) {
return *this = *this * rhs;
}
// divide-assigns a matrix by a matrix, column vector, or a scalar
template <typename R>
requires operable<Matrix, R, std::divides<>>
Matrix &operator/=(const R &rhs) {
return *this = *this / rhs;
}
// decay a 1x1 matrix to a scalar on demand
template <typename S = T>
requires(N == 1 && M == 1)
operator S &() {
return this->at(0);
}
template <typename S = T>
requires(N == 1 && M == 1)
operator const S &() const {
return this->at(0);
}
// sum up all columns
column_type hsum() const {
if constexpr (N == 1) { return *this; }
if constexpr (M == 1) { return sum(); }
for (int i = 0; i < M; i++) {}
return _map<column_type>([](auto row) { return quicktex::sum(row); }, *this);
}
// sum up all rows
row_type vsum() const {
if constexpr (N == 1) { return sum(); }
if constexpr (M == 1) { return *this; }
return std::accumulate(begin(), end(), row_type{});
}
// sum up all values
T sum() const {
// TODO: reintroduce SIMDing for this
return std::accumulate(all_begin(), all_end(), T(0));
}
template <typename R, int P>
requires operable<R, T, std::multiplies<>>
Matrix<T, M, P> mult(const Matrix<R, N, P> &rhs) const {
Matrix<T, M, P> res(0);
for (int p = 0; p < P; p++) {
// for each column of the RHS/Result
for (int m = 0; m < M; m++) {
// for each row of the LHS/Result
for (int n = 0; n < N; n++) { res.element(m, p) += element(m, n) * rhs.element(n, p); }
}
}
return res;
}
Matrix<T, N, M> transpose() const {
Matrix<T, N, M> res;
for (int m = 0; m < M; m++) { res.set_column(m, get_row(m)); }
return res;
}
template <typename R = T>
requires(N == M)
Matrix mirror() const {
Matrix result = *this;
for (int n = 0; n < N - 1; n++) {
for (int m = (n + 1); m < M; m++) { result.element(m, n) = result.element(n, m); }
}
return result;
}
// dot product of two compatible matrices
template <typename R>
requires(N == 1) && operable<T, R, std::multiplies<>> && operable<T, T, std::plus<>>
inline row_type dot(const Matrix<R, M, N> &rhs) const {
// technically this is Lt * R, but the vsum method is probably faster/more readable
// than allocationg a new transpose matrix
Matrix product = *this * rhs;
return product.vsum();
}
inline row_type sqr_mag() const { return dot(*this); }
inline Matrix abs() const {
return map([](auto c) { return quicktex::abs(c); }, *this);
}
inline Matrix clamp(T low, T high) {
return map([low, high](auto c) { return quicktex::clamp(c, low, high); }, *this);
}
inline Matrix clamp(const Matrix &low, const Matrix &high) {
return map([](auto c, auto l, auto h) { return quicktex::clamp(c, l, h); }, *this, low, high);
}
protected:
class column_iterator : public index_iterator_base<column_iterator, column_type> {
public:
using value_type = column_type;
using base = index_iterator_base<column_iterator, column_type>;
column_iterator(const Matrix *matrix = nullptr, int index = 0) : base(index), _matrix(matrix){};
column_type operator*() const { return _matrix->get_column(this->_index); }
const column_type *operator->() const { &(_matrix->get_column(this->_index)); }
friend bool operator==(const column_iterator &lhs, const column_iterator &rhs) {
return (lhs._matrix == rhs._matrix) && (lhs._index == rhs._index);
}
private:
const Matrix *_matrix;
};
template <typename V> class linear_iterator : public index_iterator_base<linear_iterator<V>, T> {
public:
using value_type = T;
using base = index_iterator_base<linear_iterator<V>, T>;
linear_iterator(V *matrix = nullptr, int index = 0) : base(index), _matrix(matrix){};
auto &operator*() { return _matrix->element(this->_index); }
auto *operator->() const { return &(_matrix->element(this->_index)); }
friend bool operator==(const linear_iterator &lhs, const linear_iterator &rhs) {
return (lhs._matrix == rhs._matrix) && (lhs._index == rhs._index);
}
private:
V *_matrix;
};
};
} // namespace quicktex

View File

@ -1,114 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021-2022 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cassert> // for assert
#include <cstddef> // for size_t
#include <cstdint> // for uint8_t, uint16_t
#include "Matrix.h"
namespace quicktex {
class Vector4;
class Vector4Int;
#pragma pack(push, 1)
class OldColor {
public:
uint8_t r;
uint8_t g;
uint8_t b;
uint8_t a;
constexpr OldColor() : OldColor(0, 0, 0, 0xFF) {}
constexpr OldColor(uint8_t vr, uint8_t vg, uint8_t vb, uint8_t va = 0xFF) : r(vr), g(vg), b(vb), a(va) {}
OldColor(Vector4Int v);
static uint16_t Pack565Unscaled(uint8_t r, uint8_t g, uint8_t b);
static uint16_t Pack565(uint8_t r, uint8_t g, uint8_t b);
static OldColor Unpack565Unscaled(uint16_t Packed);
static OldColor Unpack565(uint16_t Packed);
static OldColor PreciseRound565(Vector4 &v);
static OldColor Min(const OldColor &A, const OldColor &B);
static OldColor Max(const OldColor &A, const OldColor &B);
bool operator==(const OldColor &Rhs) const;
bool operator!=(const OldColor &Rhs) const;
uint8_t operator[](size_t index) const {
assert(index < 4);
return reinterpret_cast<const uint8_t *>(this)[index];
}
uint8_t &operator[](size_t index) {
assert(index < 4);
return reinterpret_cast<uint8_t *>(this)[index];
}
operator Vector4() const;
operator Vector4Int() const;
friend Vector4Int operator-(const OldColor &lhs, const OldColor &rhs);
void SetRGB(uint8_t vr, uint8_t vg, uint8_t vb);
void SetRGB(const OldColor &other) { SetRGB(other.r, other.g, other.b); }
uint16_t Pack565() const;
uint16_t Pack565Unscaled() const;
OldColor ScaleTo565() const;
OldColor ScaleFrom565() const;
size_t MinChannelRGB();
size_t MaxChannelRGB();
bool IsGrayscale() const { return ((r == g) && (r == b)); }
bool IsBlack() const { return (r | g | b) < 4; }
int GetLuma() const { return (13938U * r + 46869U * g + 4729U * b + 32768U) >> 16U; } // REC709 weightings
operator Vec<uint8_t, 4>() const { return {r, g, b, a}; }
OldColor(const Vec<uint8_t, 4> v) {
r = v.r();
g = v.g();
b = v.b();
a = v.a();
}
private:
static constexpr float Midpoints5bit[32] = {
.015686f, .047059f, .078431f, .111765f, .145098f, .176471f, .207843f, .241176f, .274510f, .305882f, .337255f,
.370588f, .403922f, .435294f, .466667f, .5f, .533333f, .564706f, .596078f, .629412f, .662745f, .694118f,
.725490f, .758824f, .792157f, .823529f, .854902f, .888235f, .921569f, .952941f, .984314f, 1e+37f};
static constexpr float Midpoints6bit[64] = {
.007843f, .023529f, .039216f, .054902f, .070588f, .086275f, .101961f, .117647f, .133333f, .149020f, .164706f,
.180392f, .196078f, .211765f, .227451f, .245098f, .262745f, .278431f, .294118f, .309804f, .325490f, .341176f,
.356863f, .372549f, .388235f, .403922f, .419608f, .435294f, .450980f, .466667f, .482353f, .500000f, .517647f,
.533333f, .549020f, .564706f, .580392f, .596078f, .611765f, .627451f, .643137f, .658824f, .674510f, .690196f,
.705882f, .721569f, .737255f, .754902f, .772549f, .788235f, .803922f, .819608f, .835294f, .850980f, .866667f,
.882353f, .898039f, .913725f, .929412f, .945098f, .960784f, .976471f, .992157f, 1e+37f};
};
#pragma pack(pop)
} // namespace quicktex

187
quicktex/Texture.h Normal file
View File

@ -0,0 +1,187 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021-2022 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <climits>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <memory>
#include <stdexcept>
#include <tuple>
#include <type_traits>
#include <vector>
#include "Color.h"
#include "ColorBlock.h"
namespace quicktex {
class Texture {
public:
virtual ~Texture() = default;
virtual int Width() const { return _width; }
virtual int Height() const { return _height; }
virtual std::tuple<int, int> Size() const { return std::tuple<int, int>(_width, _height); }
/**
* The texture's total size
* @return The size of the texture in bytes.
*/
virtual size_t NBytes() const noexcept = 0;
virtual const uint8_t *Data() const noexcept = 0;
virtual uint8_t *Data() noexcept = 0;
protected:
Texture(int width, int height) : _width(width), _height(height) {
if (width <= 0) throw std::invalid_argument("Texture width must be greater than 0");
if (height <= 0) throw std::invalid_argument("Texture height must be greater than 0");
}
int _width;
int _height;
};
class RawTexture : public Texture {
using Base = Texture;
public:
/**
* Create a new RawTexture
* @param width width of the texture in pixels
* @param height height of the texture in pixels
*/
RawTexture(int width, int height) : Base(width, height), _pixels(_width * _height) {}
Color GetPixel(int x, int y) const {
if (x < 0 || x >= _width) throw std::invalid_argument("x value out of range.");
if (y < 0 || y >= _height) throw std::invalid_argument("y value out of range.");
return _pixels.at(x + (y * _width));
}
void SetPixel(int x, int y, Color val) {
if (x < 0 || x >= _width) throw std::invalid_argument("x value out of range.");
if (y < 0 || y >= _height) throw std::invalid_argument("y value out of range.");
_pixels.at(x + (y * _width)) = val;
}
size_t NBytes() const noexcept override { return static_cast<unsigned long>(Width() * Height()) * sizeof(Color); }
template <int N, int M> ColorBlock<N, M> GetBlock(int block_x, int block_y) const {
if (block_x < 0) throw std::out_of_range("x value out of range.");
if (block_y < 0) throw std::out_of_range("y value out of range.");
// coordinates in the image of the top-left pixel of the selected block
ColorBlock<N, M> block;
int pixel_x = block_x * N;
int pixel_y = block_y * M;
if (pixel_x + N < _width && pixel_y + M < _height) {
// fast memcpy if the block is entirely inside the bounds of the texture
for (int y = 0; y < M; y++) {
// copy each row into the ColorBlock
block.SetRow(y, &_pixels[pixel_x + (_width * (pixel_y + y))]);
}
} else {
// slower pixel-wise copy if the block goes over the edges
for (int x = 0; x < N; x++) {
for (int y = 0; y < M; y++) { block.Set(x, y, GetPixel((pixel_x + x) % _width, (pixel_y + y) % _height)); }
}
}
return block;
}
template <int N, int M> void SetBlock(int block_x, int block_y, const ColorBlock<N, M> &block) {
if (block_x < 0) throw std::out_of_range("x value out of range.");
if (block_y < 0) throw std::out_of_range("y value out of range.");
// coordinates in the image of the top-left pixel of the selected block
int pixel_x = block_x * N;
int pixel_y = block_y * M;
if (pixel_x + N < _width && pixel_y + M < _height) {
// fast row-wise memcpy if the block is entirely inside the bounds of the texture
for (int y = 0; y < M; y++) {
// copy each row out of the ColorBlock
block.GetRow(y, &_pixels[pixel_x + (_width * (pixel_y + y))]);
}
} else {
// slower pixel-wise copy if the block goes over the edges
for (int x = 0; x < N; x++) {
for (int y = 0; y < M; y++) { SetPixel((pixel_x + x) % _width, (pixel_y + y) % _height, block.Get(x, y)); }
}
}
}
virtual const uint8_t *Data() const noexcept override { return reinterpret_cast<const uint8_t *>(_pixels.data()); }
virtual uint8_t *Data() noexcept override { return reinterpret_cast<uint8_t *>(_pixels.data()); }
protected:
std::vector<Color> _pixels;
};
template <typename B> class BlockTexture final : public Texture {
private:
std::vector<B> _blocks;
int _width_b;
int _height_b;
public:
using BlockType = B;
using Base = Texture;
/**
* Create a new BlockTexture
* @param width width of the texture in pixels. must be divisible by B::Width
* @param height height of the texture in pixels. must be divisible by B::Height
*/
BlockTexture(int width, int height) : Base(width, height) {
_width_b = (_width + B::Width - 1) / B::Width;
_height_b = (_height + B::Height - 1) / B::Height;
_blocks = std::vector<B>(_width_b * _height_b);
}
constexpr int BlocksX() const { return _width_b; }
constexpr int BlocksY() const { return _height_b; }
constexpr std::tuple<int, int> BlocksXY() const { return std::tuple<int, int>(_width_b, _height_b); }
B GetBlock(int x, int y) const {
if (x < 0 || x >= _width_b) throw std::out_of_range("x value out of range.");
if (y < 0 || y >= _height_b) throw std::out_of_range("y value out of range.");
return _blocks.at(x + (y * _width_b));
}
void SetBlock(int x, int y, const B &val) {
if (x < 0 || x >= _width_b) throw std::out_of_range("x value out of range.");
if (y < 0 || y >= _height_b) throw std::out_of_range("y value out of range.");
_blocks.at(x + (y * _width_b)) = val;
}
size_t NBytes() const noexcept override { return _blocks.size() * sizeof(B); }
const uint8_t *Data() const noexcept override { return reinterpret_cast<const uint8_t *>(_blocks.data()); }
uint8_t *Data() noexcept override { return reinterpret_cast<uint8_t *>(_blocks.data()); }
};
} // namespace quicktex

View File

@ -23,7 +23,7 @@
#include <cmath>
#include <functional>
#include "OldColor.h"
#include "Color.h"
namespace quicktex {
@ -45,11 +45,11 @@ class Vector4 {
_c[3] = scalar;
}
Vector4(const OldColor &c) : Vector4(c.r, c.g, c.b, c.a) {}
Vector4(const Color &c) : Vector4(c.r, c.g, c.b, c.a) {}
static Vector4 FromColor(const OldColor &c) { return Vector4(c); }
static Vector4 FromColor(const Color &c) { return Vector4(c); }
static Vector4 FromColorRGB(const OldColor &c) { return Vector4(c.r, c.g, c.b); }
static Vector4 FromColorRGB(const Color &c) { return Vector4(c.r, c.g, c.b); }
static float Dot(const Vector4 &lhs, const Vector4 &rhs) {
float sum = 0;

View File

@ -22,7 +22,7 @@
#include <array>
#include <functional>
#include "OldColor.h"
#include "Color.h"
#include "Vector4.h"
namespace quicktex {
@ -45,11 +45,11 @@ class Vector4Int {
_c[3] = scalar;
}
Vector4Int(const OldColor &c) : Vector4Int(c.r, c.g, c.b, c.a) {}
Vector4Int(const Color &c) : Vector4Int(c.r, c.g, c.b, c.a) {}
static Vector4Int FromColor(const OldColor &c) { return Vector4Int(c); }
static Vector4Int FromColor(const Color &c) { return Vector4Int(c); }
static Vector4Int FromColorRGB(const OldColor &c) { return Vector4Int(c.r, c.g, c.b); }
static Vector4Int FromColorRGB(const Color &c) { return Vector4Int(c.r, c.g, c.b); }
static int Dot(const Vector4Int &lhs, const Vector4Int &rhs) {
int sum = 0;

View File

@ -21,12 +21,11 @@
#include <pybind11/pybind11.h>
#include "Color.h"
#include "Decoder.h"
#include "Encoder.h"
#include "OldColor.h"
#include "Texture.h"
#include "_bindings.h"
#include "texture/RawTexture.h"
#include "texture/Texture.h"
#define STRINGIFY(x) #x
#define MACRO_STRINGIFY(x) STRINGIFY(x)
@ -46,26 +45,19 @@ PYBIND11_MODULE(_quicktex, m) {
m.attr("__version__") = "dev";
#endif
#ifdef NDEBUG
m.attr("_debug_build") = false;
#else
m.attr("_debug_build") = true;
#endif
py::options options;
// Texture
py::class_<Texture> texture(m, "Texture", py::buffer_protocol());
texture.def_property_readonly("nbytes", &Texture::nbytes);
texture.def_property_readonly("nbytes", &Texture::NBytes);
texture.def_property_readonly("size", &Texture::Size);
texture.def_readonly("width", &Texture::width);
texture.def_readonly("height", &Texture::height);
texture.def_property_readonly("width", &Texture::Width);
texture.def_property_readonly("height", &Texture::Height);
texture.def_buffer([](Texture &t) { return py::buffer_info(t.data(), t.nbytes()); });
texture.def("tobytes",
[](const Texture &t) { return py::bytes(reinterpret_cast<const char *>(t.data()), t.nbytes()); });
texture.def_buffer([](Texture &t) { return py::buffer_info(t.Data(), t.NBytes()); });
texture.def("tobytes", [](const Texture &t) { return py::bytes(reinterpret_cast<const char *>(t.Data()), t.NBytes()); });
// RawTexture
@ -74,9 +66,7 @@ PYBIND11_MODULE(_quicktex, m) {
raw_texture.def(py::init<int, int>(), "width"_a, "height"_a);
raw_texture.def_static("frombytes", &BufferToTexture<RawTexture>, "data"_a, "width"_a, "height"_a);
DefSubscript2DRef(
raw_texture, [](RawTexture &self, int x, int y) -> Color { return self.pixel(x, y); },
[](RawTexture &self, int x, int y, Color val) { self.pixel(x, y) = val; }, &RawTexture::Size);
DefSubscript2D(raw_texture, &RawTexture::GetPixel, &RawTexture::SetPixel, &RawTexture::Size);
InitS3TC(m);
}

View File

@ -24,66 +24,18 @@
#include <cstdint>
#include <cstring>
#include <memory>
#include <stdexcept>
#include <string>
#include <tuple>
#include <type_traits>
#include <utility>
#include <vector>
#include "OldColor.h"
#include "texture/BlockTexture.h"
#include "util/math.h"
#include "Color.h"
#include "ColorBlock.h"
#include "Texture.h"
#include "util.h"
namespace pybind11::detail {
using namespace quicktex;
/// Type caster for color class to allow it to be converted to and from a python tuple
template <> struct type_caster<OldColor> {
public:
PYBIND11_TYPE_CASTER(OldColor, _("Color"));
bool load(handle src, bool) {
PyObject* source = src.ptr();
PyObject* tmp = PySequence_Tuple(source);
// if the object is not a tuple, return false
if (!tmp) { return false; } // incorrect type
// check the size
Py_ssize_t size = PyTuple_Size(tmp);
if (size < 3 || size > 4) { return false; } // incorrect size
value.a = 0xFF;
// now we get the contents
for (int i = 0; i < size; i++) {
PyObject* src_chan = PyTuple_GetItem(tmp, i);
PyObject* tmp_chan = PyNumber_Long(src_chan);
if (!tmp_chan) return false; // incorrect channel type
auto chan = PyLong_AsLong(tmp_chan);
if (chan > 0xFF || chan < 0) return false; // item out of range
value[static_cast<unsigned>(i)] = static_cast<uint8_t>(chan);
Py_DECREF(tmp_chan);
}
Py_DECREF(tmp);
return !PyErr_Occurred();
}
static handle cast(OldColor src, return_value_policy, handle) {
PyObject* val = PyTuple_New(4);
for (int i = 0; i < 4; i++) {
PyObject* chan = PyLong_FromLong(src[static_cast<unsigned>(i)]);
PyTuple_SetItem(val, i, chan);
}
return val;
}
};
template <> struct type_caster<Color> {
public:
PYBIND11_TYPE_CASTER(Color, _("Color"));
@ -100,7 +52,7 @@ template <> struct type_caster<Color> {
Py_ssize_t size = PyTuple_Size(tmp);
if (size < 3 || size > 4) { return false; } // incorrect size
value.a() = 0xFF;
value.a = 0xFF;
// now we get the contents
for (int i = 0; i < size; i++) {
PyObject* src_chan = PyTuple_GetItem(tmp, i);
@ -133,49 +85,26 @@ template <> struct type_caster<Color> {
namespace py = pybind11;
namespace quicktex::bindings {
using namespace pybind11::literals;
template <typename... Args> std::string Format(const char* str, const Args&... args) {
auto output = std::string(str);
std::vector<std::string> values = {{args...}};
for (unsigned i = 0; i < values.size(); i++) {
auto key = "{" + std::to_string(i) + "}";
auto value = values[i];
while (true) {
size_t where = output.find(key);
if (where == output.npos) break;
output.replace(where, key.length(), value);
}
}
return output;
}
template <typename T> T BufferToTexture(py::buffer buf, int width, int height) {
static_assert(std::is_base_of<Texture, T>::value);
static_assert(std::is_constructible<T, int, int>::value);
auto info = buf.request(false);
auto output = T(width, height);
auto dst_size = output.nbytes();
auto dst_size = output.NBytes();
if (info.format != py::format_descriptor<uint8_t>::format())
throw std::runtime_error("Incompatible format in python buffer: expected a byte array.");
if (info.size < (Py_ssize_t)dst_size)
std::runtime_error("Incompatible format in python buffer: Input data is smaller than texture size.");
if (info.format != py::format_descriptor<uint8_t>::format()) throw std::runtime_error("Incompatible format in python buffer: expected a byte array.");
if (info.size < (Py_ssize_t)dst_size) std::runtime_error("Incompatible format in python buffer: Input data is smaller than texture size.");
if (info.ndim == 1) {
if (info.shape[0] < (Py_ssize_t)dst_size)
throw std::runtime_error("Incompatible format in python buffer: 1-D buffer has incorrect length.");
if (info.strides[0] != 1)
throw std::runtime_error("Incompatible format in python buffer: 1-D buffer is not contiguous.");
if (info.shape[0] < (Py_ssize_t)dst_size) throw std::runtime_error("Incompatible format in python buffer: 1-D buffer has incorrect length.");
if (info.strides[0] != 1) throw std::runtime_error("Incompatible format in python buffer: 1-D buffer is not contiguous.");
} else {
throw std::runtime_error("Incompatible format in python buffer: Incorrect number of dimensions.");
}
std::memcpy(output.data(), info.ptr, dst_size);
std::memcpy(output.Data(), info.ptr, dst_size);
return output;
}
@ -185,15 +114,11 @@ template <typename T> T BufferToPOD(py::buffer buf) {
auto info = buf.request(false);
if (info.format != py::format_descriptor<uint8_t>::format())
throw std::runtime_error("Incompatible format in python buffer: expected a byte array.");
if (info.size < (Py_ssize_t)sizeof(T))
std::runtime_error("Incompatible format in python buffer: Input data is smaller than texture size.");
if (info.format != py::format_descriptor<uint8_t>::format()) throw std::runtime_error("Incompatible format in python buffer: expected a byte array.");
if (info.size < (Py_ssize_t)sizeof(T)) std::runtime_error("Incompatible format in python buffer: Input data is smaller than texture size.");
if (info.ndim == 1) {
if (info.shape[0] < (Py_ssize_t)sizeof(T))
throw std::runtime_error("Incompatible format in python buffer: 1-D buffer has incorrect length.");
if (info.strides[0] != 1)
throw std::runtime_error("Incompatible format in python buffer: 1-D buffer is not contiguous.");
if (info.shape[0] < (Py_ssize_t)sizeof(T)) throw std::runtime_error("Incompatible format in python buffer: 1-D buffer has incorrect length.");
if (info.strides[0] != 1) throw std::runtime_error("Incompatible format in python buffer: 1-D buffer is not contiguous.");
} else {
throw std::runtime_error("Incompatible format in python buffer: Incorrect number of dimensions.");
}
@ -208,18 +133,15 @@ inline int PyIndex(int val, int size, std::string name = "index") {
return val;
}
template <typename T, typename Getter, typename Setter, typename Extent>
void DefSubscript(py::class_<T> t, Getter&& get, Setter&& set, Extent&& ext) {
template <typename T, typename Getter, typename Setter, typename Extent> void DefSubscript(py::class_<T> t, Getter&& get, Setter&& set, Extent&& ext) {
using V = typename std::invoke_result<Getter, T*, int>::type;
t.def(
"__getitem__", [get, ext](T& self, int index) { return (self.*get)(PyIndex(index, (self.*ext)())); }, "key"_a);
t.def(
"__setitem__", [set, ext](T& self, int index, V val) { (self.*set)(PyIndex(index, (self.*ext)()), val); },
"key"_a, "value"_a);
"__setitem__", [set, ext](T& self, int index, V val) { (self.*set)(PyIndex(index, (self.*ext)()), val); }, "key"_a, "value"_a);
}
template <typename Tpy, typename Getter, typename Setter, typename Extent>
void DefSubscript2D(Tpy t, Getter&& get, Setter&& set, Extent&& ext) {
template <typename Tpy, typename Getter, typename Setter, typename Extent> void DefSubscript2D(Tpy t, Getter&& get, Setter&& set, Extent&& ext) {
using T = typename Tpy::type;
using V = typename std::invoke_result<Getter, T*, int, int>::type;
using Coords = std::tuple<int, int>;
@ -243,32 +165,6 @@ void DefSubscript2D(Tpy t, Getter&& get, Setter&& set, Extent&& ext) {
"key"_a, "value"_a);
}
// TODO: untangle this mess
template <typename Tpy, typename Getter, typename Setter, typename Extent>
void DefSubscript2DRef(Tpy t, Getter&& get, Setter&& set, Extent&& ext) {
using T = typename Tpy::type;
using V = typename std::remove_cvref_t<std::invoke_result_t<Getter, T&, int, int>>;
using Coords = std::tuple<int, int>;
t.def(
"__getitem__",
[get, ext](T& self, Coords pnt) {
Coords s = (self.*ext)();
int x = PyIndex(std::get<0>(pnt), std::get<0>(s), "x");
int y = PyIndex(std::get<1>(pnt), std::get<1>(s), "y");
return get(self, x, y);
},
"key"_a);
t.def(
"__setitem__",
[set, ext](T& self, Coords pnt, const V& val) {
Coords s = (self.*ext)();
int x = PyIndex(std::get<0>(pnt), std::get<0>(s), "x");
int y = PyIndex(std::get<1>(pnt), std::get<1>(s), "y");
set(self, x, y, val);
},
"key"_a, "value"_a);
}
template <typename B> py::class_<B> BindBlock(py::module_& m, const char* name) {
const char* frombytes_doc = R"doc(
Create a new {0} by copying a bytes-like object.
@ -288,8 +184,7 @@ template <typename B> py::class_<B> BindBlock(py::module_& m, const char* name)
block.def_readonly_static("width", &B::Width, "The width of the block in pixels.");
block.def_readonly_static("height", &B::Height, "The height of the block in pixels.");
block.def_property_readonly_static(
"size", [](py::object) { return std::make_tuple(B::Width, B::Height); },
"The dimensions of the block in pixels.");
"size", [](py::object) { return std::make_tuple(B::Width, B::Height); }, "The dimensions of the block in pixels.");
block.def_property_readonly_static(
"nbytes", [](py::object) { return sizeof(B); }, "The size of the block in bytes.");
@ -300,7 +195,7 @@ template <typename B> py::class_<B> BindBlock(py::module_& m, const char* name)
"tobytes", [](const B& b) { return py::bytes(reinterpret_cast<const char*>(&b), sizeof(B)); },
Format(tobytes_doc, name, std::to_string(sizeof(B))).c_str());
return block;
return std::move(block);
}
template <typename B> py::class_<BlockTexture<B>> BindBlockTexture(py::module_& m, const char* name) {
@ -328,15 +223,14 @@ template <typename B> py::class_<BlockTexture<B>> BindBlockTexture(py::module_&
py::class_<BTex, Texture> block_texture(m, name);
block_texture.def(py::init<int, int>(), "width"_a, "height"_a, Format(constructor_str, name).c_str());
block_texture.def_static("from_bytes", &BufferToTexture<BTex>, "data"_a, "width"_a, "height"_a,
Format(from_bytes_str, name).c_str());
block_texture.def_static("from_bytes", &BufferToTexture<BTex>, "data"_a, "width"_a, "height"_a, Format(from_bytes_str, name).c_str());
block_texture.def_property_readonly("width_blocks", &BTex::bwidth, "The width of the texture in blocks.");
block_texture.def_property_readonly("height_blocks", &BTex::bheight, "The height of the texture in blocks.");
block_texture.def_property_readonly("size_blocks", &BTex::bsize, "The dimensions of the texture in blocks.");
block_texture.def_property_readonly("width_blocks", &BTex::BlocksX, "The width of the texture in blocks.");
block_texture.def_property_readonly("height_blocks", &BTex::BlocksY, "The height of the texture in blocks.");
block_texture.def_property_readonly("size_blocks", &BTex::BlocksXY, "The dimensions of the texture in blocks.");
DefSubscript2D(block_texture, &BTex::get_block, &BTex::set_block, &BTex::bsize);
DefSubscript2D(block_texture, &BTex::GetBlock, &BTex::SetBlock, &BTex::BlocksXY);
return block_texture;
return std::move(block_texture);
}
} // namespace quicktex::bindings

View File

@ -21,48 +21,38 @@
#include <type_traits>
namespace quicktex {
// Thanks dkavolis
template <typename E> requires std::is_enum_v<E>
constexpr inline auto operator~(E a) noexcept -> E {
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator~(E a) noexcept -> E {
using Base = std::underlying_type_t<E>;
return static_cast<E>(~static_cast<Base>(a));
}
template <typename E> requires std::is_enum_v<E>
constexpr inline auto operator|(E a, E b) noexcept -> E {
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator|(E a, E b) noexcept -> E {
using Base = std::underlying_type_t<E>;
return static_cast<E>(static_cast<Base>(a) | static_cast<Base>(b));
}
template <typename E> requires std::is_enum_v<E>
constexpr inline auto operator&(E a, E b) noexcept -> E {
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator&(E a, E b) noexcept -> E {
using Base = std::underlying_type_t<E>;
return static_cast<E>(static_cast<Base>(a) & static_cast<Base>(b));
}
template <typename E> requires std::is_enum_v<E>
constexpr inline auto operator^(E a, E b) noexcept -> E {
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator^(E a, E b) noexcept -> E {
using Base = std::underlying_type_t<E>;
return static_cast<E>(static_cast<Base>(a) ^ static_cast<Base>(b));
}
template <typename E> requires std::is_enum_v<E>
constexpr inline auto operator|=(E& a, E b) noexcept -> E& {
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator|=(E& a, E b) noexcept -> E& {
a = a | b;
return a;
}
template <typename E> requires std::is_enum_v<E>
constexpr inline auto operator&=(E& a, E b) noexcept -> E& {
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator&=(E& a, E b) noexcept -> E& {
a = a & b;
return a;
}
template <typename E> requires std::is_enum_v<E>
constexpr inline auto operator^=(E& a, E b) noexcept -> E& {
template <typename E, typename = std::enable_if_t<std::is_enum_v<E>>> constexpr inline auto operator^=(E& a, E b) noexcept -> E& {
a = a ^ b;
return a;
}
} // namespace quicktex
}

View File

@ -40,7 +40,7 @@ def path_pairs(inputs, output, suffix, extension):
"""
if len(inputs) < 1:
raise click.BadArgumentUsage('No input files were provided.')
raise click.BadArgumentUsage('No valid input files were provided.')
inpaths = [pathlib.Path(i) for i in inputs]

View File

@ -36,7 +36,8 @@ def encode():
help="Output file or directory. If outputting to a file, input filenames must be only a single item. By default, files are decoded in place.",
)
@click.argument('filenames', nargs=-1, type=click.Path(exists=True, readable=True, dir_okay=False))
def encode_format(encoder, four_cc, flip, remove, suffix, output, filenames):
def encode_format(encoder, four_cc, flip, remove, suffix, output, filenames, swizzle=False):
filenames = [f for f in filenames if not f.endswith('.dds')]
path_pairs = common.path_pairs(filenames, output, suffix, '.dds')
with click.progressbar(
@ -48,6 +49,11 @@ def encode_format(encoder, four_cc, flip, remove, suffix, output, filenames):
if flip:
image = image.transpose(Image.FLIP_TOP_BOTTOM)
if swizzle:
bands = image.split()
one = Image.new('L', image.size, 0xFF)
image = Image.merge('RGBA', (one, bands[1], bands[1], bands[0]))
dds.encode(image, encoder, four_cc).save(outpath)
if remove:
@ -107,8 +113,11 @@ def encode_auto(level, black, threecolor, flip, remove, suffix, output, filename
bc1_encoder = quicktex.s3tc.bc1.BC1Encoder(level, mode)
bc3_encoder = quicktex.s3tc.bc3.BC3Encoder(level)
filenames = [f for f in filenames if not f.endswith('.dds')]
path_pairs = common.path_pairs(filenames, output, suffix, '.dds')
assert len(filenames) > 0
with click.progressbar(
path_pairs, show_eta=False, show_pos=True, item_show_func=lambda x: str(x[0]) if x else ''
) as bar:
@ -175,9 +184,16 @@ def encode_bc1(level, black, threecolor, **kwargs):
default=18,
help='Quality level to use. Higher values = higher quality, but slower.',
)
def encode_bc3(level, **kwargs):
@click.option(
'-n/-N',
'--normal/--no-normal',
type=bool,
default=False,
help='Perform a BC3nm swizzle, copying the red channel into the alpha [default: no-normal]',
)
def encode_bc3(level, normal, **kwargs):
"""Encode images to BC4 (RGBA, 8-bit interpolated alpha)."""
encode_format.callback(quicktex.s3tc.bc3.BC3Encoder(level), 'DXT5', **kwargs)
encode_format.callback(quicktex.s3tc.bc3.BC3Encoder(level), 'DXT5', swizzle=normal, **kwargs)
@click.command('bc4')

View File

@ -275,6 +275,7 @@ def read(path: os.PathLike) -> DDSFile:
def encode(image: Image.Image, encoder, four_cc: str, mip_count: typing.Optional[int] = None) -> DDSFile:
if image.mode != 'RGBA' or image.mode != 'RGBX':
mode = 'RGBA' if 'A' in image.mode else 'RGBX'
image.apply_transparency() # why is this necessary what
image = image.convert(mode)
sizes = quicktex.image_utils.mip_sizes(image.size, mip_count)

View File

@ -20,35 +20,28 @@
#include "BC1Block.h"
#include <stdexcept>
#include <algorithm>
#include "util/bitbash.h"
#include "util/map.h"
#include "util/math.h"
#include "util/ranges.h"
#include "../../util.h"
namespace quicktex::s3tc {
uint16_t BC1Block::GetColor0Raw() const { return Pack<uint8_t, uint16_t, 8, EndpointSize>(_color0); }
uint16_t BC1Block::GetColor1Raw() const { return Pack<uint8_t, uint16_t, 8, EndpointSize>(_color1); }
uint16_t BC1Block::GetColor0Raw() const { return pack<uint16_t>(_color0, 8); }
uint16_t BC1Block::GetColor1Raw() const { return pack<uint16_t>(_color1, 8); }
void BC1Block::SetColor0Raw(uint16_t c) { _color0 = Unpack<uint16_t, uint8_t, 8, EndpointSize>(c); }
void BC1Block::SetColor1Raw(uint16_t c) { _color1 = Unpack<uint16_t, uint8_t, 8, EndpointSize>(c); }
void BC1Block::SetColor0Raw(uint16_t c) { _color0 = unpack<uint8_t, EndpointSize>(c, 8); }
void BC1Block::SetColor1Raw(uint16_t c) { _color1 = unpack<uint8_t, EndpointSize>(c, 8); }
BC1Block::SelectorArray BC1Block::GetSelectors() const {
return map([](auto row) { return unpack<uint8_t, Width>(row, SelectorBits); }, _selectors);
}
BC1Block::SelectorArray BC1Block::GetSelectors() const { return MapArray(_selectors, Unpack<uint8_t, uint8_t, SelectorBits, Width>); }
void BC1Block::SetSelectors(const BC1Block::SelectorArray& unpacked) {
for (unsigned y = 0; y < (unsigned)Height; y++) {
if (std::any_of(unpacked[y].begin(), unpacked[y].end(), [](uint8_t i) { return i > SelectorMax; }))
throw std::invalid_argument("Selector value out of bounds.");
}
_selectors = map([](auto row) { return pack<uint8_t>(row, SelectorBits, true); }, unpacked);
_selectors = MapArray(unpacked, Pack<uint8_t, uint8_t, SelectorBits, Width>);
}
bool BC1Block::operator==(const BC1Block& Rhs) const {
return _color0 == Rhs._color0 && _color1 == Rhs._color1 && _selectors == Rhs._selectors;
}
bool BC1Block::operator==(const BC1Block& Rhs) const { return _color0 == Rhs._color0 && _color1 == Rhs._color1 && _selectors == Rhs._selectors; }
bool BC1Block::operator!=(const BC1Block& Rhs) const { return !(Rhs == *this); }
} // namespace quicktex::s3tc

View File

@ -24,7 +24,7 @@
#include <cstdlib>
#include <utility>
#include "OldColor.h"
#include "../../Color.h"
namespace quicktex::s3tc {
@ -39,7 +39,7 @@ class alignas(8) BC1Block {
static constexpr uint8_t SelectorMax = (1 << SelectorBits) - 1; // maximum value of a selector
using SelectorArray = std::array<std::array<uint8_t, Width>, Height>;
using ColorPair = std::pair<OldColor, OldColor>;
using ColorPair = std::pair<Color, Color>;
private:
std::array<uint8_t, EndpointSize> _color0;
@ -60,7 +60,7 @@ class alignas(8) BC1Block {
* @param color1 second endpoint color
* @param selectors the selectors as a 4x4 list of integers, between 0 and 3 inclusive.
*/
BC1Block(OldColor color0, OldColor color1, const SelectorArray& selectors) {
BC1Block(Color color0, Color color1, const SelectorArray& selectors) {
SetColor0(color0);
SetColor1(color1);
SetSelectors(selectors);
@ -96,12 +96,12 @@ class alignas(8) BC1Block {
void SetColor0Raw(uint16_t c);
void SetColor1Raw(uint16_t c);
OldColor GetColor0() const { return OldColor::Unpack565(GetColor0Raw()); }
OldColor GetColor1() const { return OldColor::Unpack565(GetColor1Raw()); }
Color GetColor0() const { return Color::Unpack565(GetColor0Raw()); }
Color GetColor1() const { return Color::Unpack565(GetColor1Raw()); }
ColorPair GetColors() const { return {GetColor0(), GetColor1()}; }
void SetColor0(OldColor c) { SetColor0Raw(c.Pack565()); }
void SetColor1(OldColor c) { SetColor1Raw(c.Pack565()); }
void SetColor0(Color c) { SetColor0Raw(c.Pack565()); }
void SetColor1(Color c) { SetColor1Raw(c.Pack565()); }
void SetColors(ColorPair cs) {
SetColor0(cs.first);
SetColor1(cs.second);

View File

@ -23,9 +23,9 @@
#include <cassert>
#include <cstdint>
#include "ColorBlock.h"
#include "OldColor.h"
#include "s3tc/bc1/BC1Block.h"
#include "../../Color.h"
#include "../../ColorBlock.h"
#include "BC1Block.h"
namespace quicktex::s3tc {

View File

@ -21,19 +21,18 @@
#include <memory>
#include "ColorBlock.h"
#include "Decoder.h"
#include "s3tc/bc1/BC1Block.h"
#include "s3tc/interpolator/Interpolator.h"
#include "texture/BlockTexture.h"
#include "../../ColorBlock.h"
#include "../../Decoder.h"
#include "../../Texture.h"
#include "../interpolator/Interpolator.h"
#include "BC1Block.h"
namespace quicktex::s3tc {
class BC1Decoder final : public BlockDecoder<BlockTexture<BC1Block>> {
public:
using InterpolatorPtr = std::shared_ptr<Interpolator>;
BC1Decoder(bool vwrite_alpha, InterpolatorPtr interpolator)
: write_alpha(vwrite_alpha), _interpolator(interpolator) {}
BC1Decoder(bool vwrite_alpha, InterpolatorPtr interpolator) : write_alpha(vwrite_alpha), _interpolator(interpolator) {}
BC1Decoder(bool vwrite_alpha = false) : BC1Decoder(vwrite_alpha, std::make_shared<Interpolator>()) {}

View File

@ -29,28 +29,24 @@
#include <stdexcept>
#include <type_traits>
#include "ColorBlock.h"
#include "../../Color.h"
#include "../../ColorBlock.h"
#include "../../Matrix4x4.h"
#include "../../Texture.h"
#include "../../Vector4.h"
#include "../../Vector4Int.h"
#include "../../bitwiseEnums.h"
#include "../../util.h"
#include "Histogram.h"
#include "Matrix4x4.h"
#include "OldColor.h"
#include "Vector4.h"
#include "Vector4Int.h"
#include "s3tc/bc1/BC1Block.h"
#include "s3tc/bc1/OrderTable.h"
#include "s3tc/bc1/SingleColorTable.h"
#include "texture/Texture.h"
#include "util/bitbash.h"
#include "util/bitwiseEnums.h"
#include "util/math.h"
#include "OrderTable.h"
#include "SingleColorTable.h"
namespace quicktex::s3tc {
// constructors
BC1Encoder::BC1Encoder(unsigned int level, ColorMode color_mode, InterpolatorPtr interpolator)
: _interpolator(interpolator), _color_mode(color_mode) {
if (color_mode != ColorMode::FourColor && color_mode != ColorMode::ThreeColor &&
color_mode != ColorMode::ThreeColorBlack) {
BC1Encoder::BC1Encoder(unsigned int level, ColorMode color_mode, InterpolatorPtr interpolator) : _interpolator(interpolator), _color_mode(color_mode) {
if (color_mode != ColorMode::FourColor && color_mode != ColorMode::ThreeColor && color_mode != ColorMode::ThreeColorBlack) {
throw std::invalid_argument("Encoder color mode must be FourColor, ThreeColor, or ThreeColorBlack");
}
@ -77,9 +73,7 @@ BC1Encoder::BC1Encoder(unsigned int level, ColorMode color_mode, InterpolatorPtr
// Getters and Setters
void BC1Encoder::SetLevel(unsigned level) {
if (level > 19)
throw std::invalid_argument(
"Level out of range, bust be between 0 and 18 inclusive"); // theres a secret level 19 but shhhhhh
if (level > 19) throw std::invalid_argument("Level out of range, bust be between 0 and 18 inclusive"); // theres a secret level 19 but shhhhhh
two_ls_passes = false;
two_ep_passes = false;
@ -255,20 +249,14 @@ void BC1Encoder::SetLevel(unsigned level) {
_orderings3 = clamp(_orderings3, 1U, OrderTable<3>::BestOrderCount);
}
void BC1Encoder::SetOrderings4(unsigned orderings4) {
_orderings4 = clamp(orderings4, 1U, OrderTable<4>::BestOrderCount);
}
void BC1Encoder::SetOrderings3(unsigned orderings3) {
_orderings3 = clamp(orderings3, 1U, OrderTable<3>::BestOrderCount);
}
void BC1Encoder::SetOrderings4(unsigned orderings4) { _orderings4 = clamp(orderings4, 1U, OrderTable<4>::BestOrderCount); }
void BC1Encoder::SetOrderings3(unsigned orderings3) { _orderings3 = clamp(orderings3, 1U, OrderTable<3>::BestOrderCount); }
void BC1Encoder::SetOrderings(OrderingPair orderings) {
SetOrderings4(std::get<0>(orderings));
SetOrderings3(std::get<1>(orderings));
}
void BC1Encoder::SetPowerIterations(unsigned int power_iters) {
_power_iterations = clamp(power_iters, min_power_iterations, max_power_iterations);
}
void BC1Encoder::SetPowerIterations(unsigned int power_iters) { _power_iterations = clamp(power_iters, min_power_iterations, max_power_iterations); }
// Public methods
BC1Block BC1Encoder::EncodeBlock(const ColorBlock<4, 4> &pixels) const {
@ -316,9 +304,7 @@ BC1Block BC1Encoder::EncodeBlock(const ColorBlock<4, 4> &pixels) const {
// First refinement pass using ordered cluster fit
if (result.error > 0 && use_likely_orderings) {
for (unsigned iter = 0; iter < total_cf_passes; iter++) {
RefineBlockCF<ColorMode::FourColor>(result, pixels, metrics, _error_mode, _orderings4);
}
for (unsigned iter = 0; iter < total_cf_passes; iter++) { RefineBlockCF<ColorMode::FourColor>(result, pixels, metrics, _error_mode, _orderings4); }
}
// try for 3-color block
@ -339,15 +325,13 @@ BC1Block BC1Encoder::EncodeBlock(const ColorBlock<4, 4> &pixels) const {
}
// try for 3-color block with black
if (result.error > 0 && (_color_mode == ColorMode::ThreeColorBlack) && metrics.has_black &&
!metrics.max.IsBlack()) {
if (result.error > 0 && (_color_mode == ColorMode::ThreeColorBlack) && metrics.has_black && !metrics.max.IsBlack()) {
EncodeResults trial_result;
BlockMetrics metrics_no_black = pixels.GetMetrics(true);
FindEndpoints(trial_result, pixels, metrics_no_black, EndpointMode::PCA, true);
FindSelectors<ColorMode::ThreeColorBlack>(trial_result, pixels, ErrorMode::Full);
RefineBlockLS<ColorMode::ThreeColorBlack>(trial_result, pixels, metrics_no_black, ErrorMode::Full,
total_ls_passes);
RefineBlockLS<ColorMode::ThreeColorBlack>(trial_result, pixels, metrics_no_black, ErrorMode::Full, total_ls_passes);
if (trial_result.error < result.error) { result = trial_result; }
}
@ -359,7 +343,7 @@ BC1Block BC1Encoder::EncodeBlock(const ColorBlock<4, 4> &pixels) const {
}
// Private methods
BC1Block BC1Encoder::WriteBlockSolid(OldColor color) const {
BC1Block BC1Encoder::WriteBlockSolid(Color color) const {
uint8_t mask = 0xAA; // 2222
uint16_t min16, max16;
@ -457,7 +441,7 @@ BC1Block BC1Encoder::WriteBlock(EncodeResults &result) const {
return BC1Block(ep0, ep1, selectors);
}
void BC1Encoder::FindEndpointsSingleColor(EncodeResults &result, OldColor color, bool is_3color) const {
void BC1Encoder::FindEndpointsSingleColor(EncodeResults &result, Color color, bool is_3color) const {
auto &match5 = is_3color ? _single_match5_half : _single_match5;
auto &match6 = is_3color ? _single_match6_half : _single_match6;
@ -467,14 +451,13 @@ void BC1Encoder::FindEndpointsSingleColor(EncodeResults &result, OldColor color,
result.color_mode = is_3color ? ColorMode::ThreeColor : ColorMode::FourColor;
result.error = match_r.error + match_g.error + match_b.error;
result.low = OldColor(match_r.low, match_g.low, match_b.low);
result.high = OldColor(match_r.high, match_g.high, match_b.high);
result.low = Color(match_r.low, match_g.low, match_b.low);
result.high = Color(match_r.high, match_g.high, match_b.high);
// selectors decided when writing, no point deciding them now
}
void BC1Encoder::FindEndpointsSingleColor(EncodeResults &result, const CBlock &pixels, OldColor color,
bool is_3color) const {
std::array<OldColor, 4> colors = _interpolator->InterpolateBC1(result.low, result.high, is_3color);
void BC1Encoder::FindEndpointsSingleColor(EncodeResults &result, const CBlock &pixels, Color color, bool is_3color) const {
std::array<Color, 4> colors = _interpolator->InterpolateBC1(result.low, result.high, is_3color);
Vector4Int result_vector = (Vector4Int)colors[2];
FindEndpointsSingleColor(result, color, is_3color);
@ -488,43 +471,40 @@ void BC1Encoder::FindEndpointsSingleColor(EncodeResults &result, const CBlock &p
}
}
void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics,
EndpointMode endpoint_mode, bool ignore_black) const {
void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics, EndpointMode endpoint_mode, bool ignore_black) const {
if (metrics.is_greyscale) {
// specialized greyscale case
const unsigned fr = pixels.Get(0, 0).r;
if (metrics.max.r - metrics.min.r < 2) {
// single color block
uint8_t fr5 = (uint8_t)scale_from_8<5>(fr);
uint8_t fr6 = (uint8_t)scale_from_8<6>(fr);
uint8_t fr5 = (uint8_t)scale8To5(fr);
uint8_t fr6 = (uint8_t)scale8To6(fr);
result.low = OldColor(fr5, fr6, fr5);
result.low = Color(fr5, fr6, fr5);
result.high = result.low;
} else {
uint8_t lr5 = scale_from_8<5>(metrics.min.r);
uint8_t lr6 = scale_from_8<6>(metrics.min.r);
uint8_t lr5 = scale8To5(metrics.min.r);
uint8_t lr6 = scale8To6(metrics.min.r);
uint8_t hr5 = scale_from_8<5>(metrics.max.r);
uint8_t hr6 = scale_from_8<6>(metrics.max.r);
uint8_t hr5 = scale8To5(metrics.max.r);
uint8_t hr6 = scale8To6(metrics.max.r);
result.low = OldColor(lr5, lr6, lr5);
result.high = OldColor(hr5, hr6, hr5);
result.low = Color(lr5, lr6, lr5);
result.high = Color(hr5, hr6, hr5);
}
} else if (endpoint_mode == EndpointMode::LeastSquares) {
// 2D Least Squares approach from Humus's example, with added inset and optimal rounding.
OldColor diff =
OldColor(metrics.max.r - metrics.min.r, metrics.max.g - metrics.min.g, metrics.max.b - metrics.min.b);
Color diff = Color(metrics.max.r - metrics.min.r, metrics.max.g - metrics.min.g, metrics.max.b - metrics.min.b);
Vector4 l = {0, 0, 0};
Vector4 h = {0, 0, 0};
auto &sums = metrics.sums;
auto &min = metrics.min;
auto &max = metrics.max;
unsigned chan0 = (unsigned)diff.MaxChannelRGB(); // primary axis of the bounding box
l[chan0] = (float)min[chan0];
h[chan0] = (float)max[chan0];
h[chan0] = (float)min[chan0];
assert((diff[chan0] >= diff[(chan0 + 1) % 3]) && (diff[chan0] >= diff[(chan0 + 2) % 3]));
@ -541,7 +521,7 @@ void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, cons
float denominator = (float)(16 * sum_xx) - (float)(sum_x * sum_x);
// once per secondary axis, calculate high and low using least squares
if (abs(denominator) > 1e-8f) {
if (fabs(denominator) > 1e-8f) {
for (unsigned i = 1; i < 3; i++) {
/* each secondary axis is fitted with a linear formula of the form
* y = ax + b
@ -569,8 +549,8 @@ void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, cons
h[c] = ((h[c] - inset) / 255.0f);
}
result.low = OldColor::PreciseRound565(l);
result.high = OldColor::PreciseRound565(h);
result.low = Color::PreciseRound565(l);
result.high = Color::PreciseRound565(h);
} else if (endpoint_mode == EndpointMode::BoundingBox) {
// Algorithm from icbc.h compress_dxt1_fast()
Vector4 l, h;
@ -597,20 +577,19 @@ void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, cons
if (icov_xz < 0) std::swap(l[0], h[0]);
if (icov_yz < 0) std::swap(l[1], h[1]);
result.low = OldColor::PreciseRound565(l);
result.high = OldColor::PreciseRound565(h);
result.low = Color::PreciseRound565(l);
result.high = Color::PreciseRound565(h);
} else if (endpoint_mode == EndpointMode::BoundingBoxInt) {
// Algorithm from icbc.h compress_dxt1_fast(), but converted to integer.
// TODO: handle constant blue channel better
OldColor min, max;
Color min, max;
// rescale and inset values
for (unsigned c = 0; c < 3; c++) {
int inset = ((int)(metrics.max[c] - metrics.min[c]) - 8) >> 4; // 1/16 of delta, with bias
min[c] = clamp(metrics.min[c] + inset, 0, 255);
max[c] = clamp(metrics.max[c] - inset, 0, 255);
min[c] = clamp255(metrics.min[c] + inset);
max[c] = clamp255(metrics.max[c] - inset);
}
int icov_xz = 0, icov_yz = 0;
@ -628,21 +607,19 @@ void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, cons
} else if (endpoint_mode == EndpointMode::PCA) {
// the slow way
// Select 2 colors along the principle axis. (There must be a faster/simpler way.)
auto min = Vector4::FromColorRGB(metrics.min);
auto max = Vector4::FromColorRGB(metrics.max);
auto avg = Vector4::FromColorRGB(metrics.avg);
// TODO: handle constant blue channel better
Color min = metrics.min;
Color max = metrics.max;
Color avg = metrics.avg;
Vec<float, 4> axis = {306, 601, 117, 0}; // Luma vector
auto covariance = Matrix<float, 4, 4>::identity();
Vector4 axis = {306, 601, 117}; // Luma vector
Matrix4x4 covariance = Matrix4x4::Identity();
for (int i = 0; i < 16; i++) {
auto val = pixels.Get(i);
if (ignore_black && val.IsBlack()) continue;
auto diff = val - avg;
auto color_vec = Vector4::FromColorRGB(val);
Vector4 diff = color_vec - avg;
for (unsigned c1 = 0; c1 < 3; c1++) {
for (unsigned c2 = c1; c2 < 3; c2++) {
covariance[c1][c2] += (diff[c1] * diff[c2]);
@ -652,24 +629,20 @@ void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, cons
}
covariance /= 255.0f;
covariance = covariance.mirror();
covariance.Mirror();
Vec<float, 4> delta = max - min;
Vector4 delta = max - min;
// realign r and g axes to match
if (covariance[0][2] < 0) delta[0] = -delta[0]; // r vs b
if (covariance[1][2] < 0) delta[1] = -delta[1]; // g vs b
// using the covariance matrix, stretch the delta vector towards the primary axis of the data using power
// iteration the end result of this may actually be the same as the least squares approach, will have to do more
// research
for (unsigned power_iter = 0; power_iter < _power_iterations; power_iter++) {
delta = covariance.mult(delta);
}
// using the covariance matrix, stretch the delta vector towards the primary axis of the data using power iteration
// the end result of this may actually be the same as the least squares approach, will have to do more research
for (unsigned power_iter = 0; power_iter < _power_iterations; power_iter++) { delta = covariance * delta; }
// if we found any correlation, then this is our new axis. otherwise we fallback to the luma vector
auto delta_abs = delta.abs();
float k = *std::max_element(delta_abs.begin(), delta_abs.end());
float k = delta.MaxAbs(3);
if (k >= 2) { axis = delta * (2048.0f / k); }
axis *= 16;
@ -680,12 +653,13 @@ void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, cons
int min_index = 0, max_index = 0;
for (int i = 0; i < 16; i++) {
Color val = pixels.Get(i); //todo: fix this mess
if (ignore_black && (val.r() | val.g() | val.b()) < 4) continue;
auto val = pixels.Get(i);
if (ignore_black && val.IsBlack()) continue;
auto color_vec = Vector4::FromColorRGB(val);
// since axis is constant here, I dont think its magnitude actually matters,
// since we only care about the min or max dot product
float dot = (Vec<float,4>(val)).dot(axis);
float dot = color_vec.Dot(axis);
if (dot > max_dot) {
max_dot = dot;
max_index = i;
@ -703,21 +677,20 @@ void BC1Encoder::FindEndpoints(EncodeResults &result, const CBlock &pixels, cons
result.color_mode = ColorMode::Incomplete;
}
template <BC1Encoder::ColorMode M>
void BC1Encoder::FindSelectors(EncodeResults &result, const CBlock &pixels, ErrorMode error_mode) const {
template <BC1Encoder::ColorMode M> void BC1Encoder::FindSelectors(EncodeResults &result, const CBlock &pixels, ErrorMode error_mode) const {
assert(!((error_mode != ErrorMode::Full) && (bool)(M & ColorMode::ThreeColor)));
const int color_count = (unsigned)M & 0x0F;
std::array<OldColor, 4> colors = _interpolator->InterpolateBC1(result.low, result.high, color_count == 3);
std::array<Color, 4> colors = _interpolator->InterpolateBC1(result.low, result.high, color_count == 3);
std::array<Vector4Int, 4> color_vectors;
if (color_count == 4) {
color_vectors = {Vector4Int::FromColorRGB(colors[0]), Vector4Int::FromColorRGB(colors[2]),
Vector4Int::FromColorRGB(colors[3]), Vector4Int::FromColorRGB(colors[1])};
color_vectors = {Vector4Int::FromColorRGB(colors[0]), Vector4Int::FromColorRGB(colors[2]), Vector4Int::FromColorRGB(colors[3]),
Vector4Int::FromColorRGB(colors[1])};
} else {
color_vectors = {Vector4Int::FromColorRGB(colors[0]), Vector4Int::FromColorRGB(colors[2]),
Vector4Int::FromColorRGB(colors[1]), Vector4Int::FromColorRGB(colors[3])};
color_vectors = {Vector4Int::FromColorRGB(colors[0]), Vector4Int::FromColorRGB(colors[2]), Vector4Int::FromColorRGB(colors[1]),
Vector4Int::FromColorRGB(colors[3])};
}
unsigned total_error = 0;
@ -741,8 +714,7 @@ void BC1Encoder::FindSelectors(EncodeResults &result, const CBlock &pixels, Erro
// llvm is just going to unswitch this anyways so its not an issue
auto diff = pixel_vector - color_vectors[selector];
total_error += diff.SqrMag();
if (i % 4 != 0 && total_error >= result.error)
break; // check only once per row if we're generating too much error
if (i % 4 != 0 && total_error >= result.error) break; // check only once per row if we're generating too much error
}
result.selectors[i] = selector;
@ -755,7 +727,7 @@ void BC1Encoder::FindSelectors(EncodeResults &result, const CBlock &pixels, Erro
Vector4Int pixel_vector = Vector4Int::FromColorRGB(pixels.Get(i));
auto diff = pixel_vector - color_vectors[0];
float sel_f = (float)diff.Dot(axis) * f + 0.5f;
uint8_t sel = (uint8_t)clamp<int>((int)sel_f, 1, 3);
uint8_t sel = (uint8_t)clampi((int)sel_f, 1, 3);
unsigned err0 = (color_vectors[sel - 1] - pixel_vector).SqrMag();
unsigned err1 = (color_vectors[sel] - pixel_vector).SqrMag();
@ -807,8 +779,7 @@ void BC1Encoder::FindSelectors(EncodeResults &result, const CBlock &pixels, Erro
result.color_mode = M;
}
template <BC1Encoder::ColorMode M>
bool BC1Encoder::RefineEndpointsLS(EncodeResults &result, const CBlock &pixels, BlockMetrics metrics) const {
template <BC1Encoder::ColorMode M> bool BC1Encoder::RefineEndpointsLS(EncodeResults &result, const CBlock &pixels, BlockMetrics metrics) const {
const int color_count = (unsigned)M & 0x0F;
static_assert(color_count == 3 || color_count == 4);
assert(result.color_mode != ColorMode::Incomplete);
@ -819,12 +790,11 @@ bool BC1Encoder::RefineEndpointsLS(EncodeResults &result, const CBlock &pixels,
Vector4 matrix = Vector4(0);
for (int i = 0; i < 16; i++) {
const OldColor color = pixels.Get(i);
const Color color = pixels.Get(i);
const uint8_t sel = result.selectors[i];
if ((bool)(M & ColorMode::ThreeColorBlack) && color.IsBlack()) continue;
if ((bool)(M & ColorMode::ThreeColor) && sel == 3U)
continue; // NOTE: selectors for 3-color are in linear order here, but not in original
if ((bool)(M & ColorMode::ThreeColor) && sel == 3U) continue; // NOTE: selectors for 3-color are in linear order here, but not in original
assert(sel < color_count);
const Vector4Int color_vector = Vector4Int::FromColorRGB(color);
@ -835,7 +805,7 @@ bool BC1Encoder::RefineEndpointsLS(EncodeResults &result, const CBlock &pixels,
// invert matrix
float det = matrix.Determinant2x2(); // z00 * z11 - z01 * z10;
if (abs(det) < 1e-8f) {
if (fabs(det) < 1e-8f) {
result.color_mode = ColorMode::Incomplete;
return false;
}
@ -850,14 +820,12 @@ bool BC1Encoder::RefineEndpointsLS(EncodeResults &result, const CBlock &pixels,
Vector4 high = (matrix[2] * q00) + (matrix[3] * q10);
result.color_mode = M;
result.low = OldColor::PreciseRound565(low);
result.high = OldColor::PreciseRound565(high);
result.low = Color::PreciseRound565(low);
result.high = Color::PreciseRound565(high);
return true;
}
template <BC1Encoder::ColorMode M>
void BC1Encoder::RefineEndpointsLS(EncodeResults &result, std::array<Vector4, 17> &sums, Vector4 &matrix,
Hash hash) const {
template <BC1Encoder::ColorMode M> void BC1Encoder::RefineEndpointsLS(EncodeResults &result, std::array<Vector4, 17> &sums, Vector4 &matrix, Hash hash) const {
const int color_count = (unsigned)M & 0x0F;
static_assert(color_count == 3 || color_count == 4);
assert(result.color_mode != ColorMode::Incomplete);
@ -878,13 +846,12 @@ void BC1Encoder::RefineEndpointsLS(EncodeResults &result, std::array<Vector4, 17
Vector4 high = (matrix[2] * q00) + (matrix[3] * q10);
result.color_mode = M;
result.low = OldColor::PreciseRound565(low);
result.high = OldColor::PreciseRound565(high);
result.low = Color::PreciseRound565(low);
result.high = Color::PreciseRound565(high);
}
template <BC1Encoder::ColorMode M>
void BC1Encoder::RefineBlockLS(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics,
ErrorMode error_mode, unsigned passes) const {
void BC1Encoder::RefineBlockLS(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics, ErrorMode error_mode, unsigned passes) const {
assert(error_mode != ErrorMode::None || passes == 1);
for (unsigned pass = 0; pass < passes; pass++) {
@ -909,8 +876,7 @@ void BC1Encoder::RefineBlockLS(EncodeResults &result, const CBlock &pixels, cons
}
template <BC1Encoder::ColorMode M>
void BC1Encoder::RefineBlockCF(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics,
ErrorMode error_mode, unsigned orderings) const {
void BC1Encoder::RefineBlockCF(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics, ErrorMode error_mode, unsigned orderings) const {
const int color_count = (unsigned)M & 0x0F;
static_assert(color_count == 3 || color_count == 4);
assert(result.color_mode != ColorMode::Incomplete);
@ -989,8 +955,7 @@ void BC1Encoder::EndpointSearch(EncodeResults &result, const CBlock &pixels) con
for (unsigned i = 0; i < _search_rounds; i++) {
const unsigned voxel_index = (unsigned)(i & 15);
assert((unsigned)Voxels[(unsigned)Voxels[voxel_index][3]][3] ==
voxel_index); // make sure voxels are symmetrical
assert((unsigned)Voxels[(unsigned)Voxels[voxel_index][3]][3] == voxel_index); // make sure voxels are symmetrical
if ((int)(i & 31) == forbidden_direction) continue;

View File

@ -26,13 +26,13 @@
#include <memory>
#include <tuple>
#include "ColorBlock.h"
#include "Encoder.h"
#include "OldColor.h"
#include "s3tc/bc1/BC1Block.h"
#include "s3tc/bc1/SingleColorTable.h"
#include "s3tc/interpolator/Interpolator.h"
#include "texture/BlockTexture.h"
#include "../../Color.h"
#include "../../ColorBlock.h"
#include "../../Encoder.h"
#include "../../Texture.h"
#include "../interpolator/Interpolator.h"
#include "BC1Block.h"
#include "SingleColorTable.h"
namespace quicktex {
class Vector4;
@ -79,8 +79,7 @@ class BC1Encoder final : public BlockEncoder<BlockTexture<BC1Block>> {
};
enum class EndpointMode {
// Use 2D least squares+inset+optimal rounding (the method used in Humus's GPU texture encoding demo), instead
// of PCA.
// Use 2D least squares+inset+optimal rounding (the method used in Humus's GPU texture encoding demo), instead of PCA.
// Around 18% faster, very slightly lower average quality to better (depends on the content).
LeastSquares,
@ -102,8 +101,7 @@ class BC1Encoder final : public BlockEncoder<BlockTexture<BC1Block>> {
BC1Encoder(unsigned level, ColorMode color_mode, InterpolatorPtr interpolator);
BC1Encoder(unsigned int level = 5, ColorMode color_mode = ColorMode::FourColor)
: BC1Encoder(level, color_mode, std::make_shared<Interpolator>()) {}
BC1Encoder(unsigned int level = 5, ColorMode color_mode = ColorMode::FourColor) : BC1Encoder(level, color_mode, std::make_shared<Interpolator>()) {}
// Getters and Setters
void SetLevel(unsigned level);
@ -143,8 +141,8 @@ class BC1Encoder final : public BlockEncoder<BlockTexture<BC1Block>> {
// Unpacked BC1 block with metadata
struct EncodeResults {
OldColor low;
OldColor high;
Color low;
Color high;
std::array<uint8_t, 16> selectors = {0};
ColorMode color_mode = ColorMode::Incomplete;
bool solid = false;
@ -171,29 +169,24 @@ class BC1Encoder final : public BlockEncoder<BlockTexture<BC1Block>> {
unsigned _orderings4;
unsigned _orderings3;
BC1Block WriteBlockSolid(OldColor color) const;
BC1Block WriteBlockSolid(Color color) const;
BC1Block WriteBlock(EncodeResults &result) const;
void FindEndpoints(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics,
EndpointMode endpoint_mode, bool ignore_black = false) const;
void FindEndpointsSingleColor(EncodeResults &result, OldColor color, bool is_3color = false) const;
void FindEndpointsSingleColor(EncodeResults &result, const CBlock &pixels, OldColor color, bool is_3color) const;
void FindEndpoints(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics, EndpointMode endpoint_mode, bool ignore_black = false) const;
void FindEndpointsSingleColor(EncodeResults &result, Color color, bool is_3color = false) const;
void FindEndpointsSingleColor(EncodeResults &result, const CBlock &pixels, Color color, bool is_3color) const;
template <ColorMode M> void FindSelectors(EncodeResults &result, const CBlock &pixels, ErrorMode error_mode) const;
template <ColorMode M>
bool RefineEndpointsLS(EncodeResults &result, const CBlock &pixels, BlockMetrics metrics) const;
template <ColorMode M> bool RefineEndpointsLS(EncodeResults &result, const CBlock &pixels, BlockMetrics metrics) const;
template <ColorMode M> void RefineEndpointsLS(EncodeResults &result, std::array<Vector4, 17> &sums, Vector4 &matrix, Hash hash) const;
template <ColorMode M>
void RefineEndpointsLS(EncodeResults &result, std::array<Vector4, 17> &sums, Vector4 &matrix, Hash hash) const;
void RefineBlockLS(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics, ErrorMode error_mode, unsigned passes) const;
template <ColorMode M>
void RefineBlockLS(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics, ErrorMode error_mode,
unsigned passes) const;
template <ColorMode M>
void RefineBlockCF(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics, ErrorMode error_mode,
unsigned orderings) const;
void RefineBlockCF(EncodeResults &result, const CBlock &pixels, const BlockMetrics &metrics, ErrorMode error_mode, unsigned orderings) const;
void EndpointSearch(EncodeResults &result, const CBlock &pixels) const;
};

View File

@ -27,10 +27,10 @@
#include <mutex>
#include <numeric>
#include "Vector4.h"
#include "util/math.h"
#include "../../Vector4.h"
#include "../../util.h"
namespace quicktex::s3tc {
namespace quicktex::s3tc {
template <size_t N> class Histogram {
public:
using Hash = uint16_t;
@ -71,7 +71,7 @@ template <size_t N> class Histogram {
unsigned GetPacked() const {
Hash packed = 0;
for (unsigned i = 0; i < (N - 1); i++) {
for (unsigned i = 0; i < (N-1); i++) {
assert(_bins[i] <= (1U << 4) - 1U);
packed |= static_cast<uint16_t>(_bins[i]) << (i * 4U);
}

View File

@ -21,7 +21,7 @@
#include <array>
#include "Vector4.h"
#include "../../Vector4.h"
namespace quicktex::s3tc {
using Hash = uint16_t;

View File

@ -29,9 +29,8 @@
#include <mutex>
#include <type_traits>
#include "../../Vector4.h"
#include "Histogram.h"
#include "Vector4.h"
#include "util/math.h"
namespace quicktex::s3tc {
template <size_t N> class OrderTable {
@ -59,7 +58,7 @@ template <size_t N> class OrderTable {
static bool Generate() {
static_assert(N == 4 || N == 3);
table_mutex.lock();
std::scoped_lock{table_mutex};
if (!generated) {
hashes = new std::array<Hash, HashCount>();
factors = new std::array<Vector4, OrderCount>();
@ -74,7 +73,7 @@ template <size_t N> class OrderTable {
for (unsigned sel = 0; sel < N; sel++) factor_matrix += (Weights[sel] * h[sel]);
float det = factor_matrix.Determinant2x2();
if (abs(det) < 1e-8f) {
if (fabs(det) < 1e-8f) {
factors->at(i) = Vector4(0);
} else {
std::swap(factor_matrix[0], factor_matrix[3]);
@ -86,8 +85,6 @@ template <size_t N> class OrderTable {
generated = true;
}
table_mutex.unlock();
assert(generated);
return true;
}
@ -114,9 +111,7 @@ template <size_t N> class OrderTable {
return factors->at(hash);
}
static bool IsSingleColor(Hash hash) {
return (std::find(SingleColorHashes.begin(), SingleColorHashes.end(), hash) != SingleColorHashes.end());
}
static bool IsSingleColor(Hash hash) { return (std::find(SingleColorHashes.begin(), SingleColorHashes.end(), hash) != SingleColorHashes.end()); }
private:
static std::mutex table_mutex;

View File

@ -23,11 +23,10 @@
#include <cstdint>
#include <memory>
#include "s3tc/interpolator/Interpolator.h"
#include "util/bitbash.h"
#include "util/math.h"
#include "../../util.h"
#include "../interpolator/Interpolator.h"
namespace quicktex::s3tc {
namespace quicktex::s3tc {
struct BC1MatchEntry {
uint8_t high;
@ -60,10 +59,10 @@ template <size_t B, size_t N> MatchListPtr SingleColorTable(InterpolatorPtr inte
// TODO: Can probably avoid testing for values that definitely wont yield good results,
// e.g. low8 and high8 both much smaller or larger than index
for (uint8_t low = 0; low < Size; low++) {
uint8_t low8 = scale_to_8<B>(low);
uint8_t low8 = (B == 5) ? scale5To8(low) : scale6To8(low);
for (uint8_t high = 0; high < Size; high++) {
uint8_t high8 = scale_to_8<B>(high);
uint8_t high8 = (B == 5) ? scale5To8(high) : scale6To8(high);
uint8_t value;
if (use_8bit) {
@ -72,10 +71,10 @@ template <size_t B, size_t N> MatchListPtr SingleColorTable(InterpolatorPtr inte
value = (B == 5) ? interpolator->Interpolate5(high, low) : interpolator->Interpolate6(high, low);
}
unsigned new_error = abs(value - (int)i);
unsigned new_error = iabs(value - (int)i);
// We only need to factor in 3% error in BC1 ideal mode.
if (ideal) new_error += (abs(high8 - (int)low8) * 3) / 100;
if (ideal) new_error += (iabs(high8 - (int)low8) * 3) / 100;
if ((new_error < error) || (new_error == error && low == high)) {
assert(new_error <= UINT8_MAX);

View File

@ -23,12 +23,16 @@
#include <pybind11/stl.h>
#include <array>
#include <memory>
#include <cstddef>
#include <cstdint>
#include <stdexcept>
#include <string>
#include "s3tc/bc1/BC1Block.h"
#include "s3tc/bc1/BC1Decoder.h"
#include "s3tc/bc1/BC1Encoder.h"
#include "s3tc/interpolator/Interpolator.h"
#include "../../Decoder.h"
#include "../../Encoder.h"
#include "../interpolator/Interpolator.h"
#include "BC1Decoder.h"
#include "BC1Encoder.h"
namespace py = pybind11;
namespace quicktex::bindings {
@ -46,7 +50,7 @@ void InitBC1(py::module_ &s3tc) {
bc1_block.doc() = "A single BC1 block.";
bc1_block.def(py::init<>());
bc1_block.def(py::init<OldColor, OldColor, BC1Block::SelectorArray>(), "color0"_a, "color1"_a, "selectors"_a, R"doc(
bc1_block.def(py::init<Color, Color, BC1Block::SelectorArray>(), "color0"_a, "color1"_a, "selectors"_a, R"doc(
Create a new BC1Block with the specified endpoints and selectors
:param color0: The first endpoint
@ -54,8 +58,7 @@ void InitBC1(py::module_ &s3tc) {
:param selectors: the selectors as a 4x4 list of integers, between 0 and 3 inclusive.
)doc");
bc1_block.def_property("endpoints", &BC1Block::GetColors, &BC1Block::SetColors,
"The block's endpoint colors as a 2-tuple.");
bc1_block.def_property("endpoints", &BC1Block::GetColors, &BC1Block::SetColors, "The block's endpoint colors as a 2-tuple.");
bc1_block.def_property("selectors", &BC1Block::GetSelectors, &BC1Block::SetSelectors, R"doc(
The block's selectors as a 4x4 list of integers between 0 and 3 inclusive.
@ -80,42 +83,27 @@ void InitBC1(py::module_ &s3tc) {
// region BC1Encoder
py::class_<BC1Encoder> bc1_encoder(bc1, "BC1Encoder", "Encodes RGB textures to BC1.");
py::enum_<BC1Encoder::EndpointMode>(bc1_encoder, "EndpointMode",
"Enum representing various methods of finding endpoints in a block.")
.value("LeastSquares", BC1Encoder::EndpointMode::LeastSquares,
"Find endpoints using a 2D least squares approach.")
.value("BoundingBox", BC1Encoder::EndpointMode::BoundingBox,
"Find endpoints using a simple bounding box. Fast but inaccurate.")
.value("BoundingBoxInt", BC1Encoder::EndpointMode::BoundingBoxInt,
"Same as BoundingBox but using integers, slightly faster.")
.value("PCA", BC1Encoder::EndpointMode::PCA,
"Find endpoints using Principle Component Analysis. Slowest but highest quality method.");
py::enum_<BC1Encoder::EndpointMode>(bc1_encoder, "EndpointMode", "Enum representing various methods of finding endpoints in a block.")
.value("LeastSquares", BC1Encoder::EndpointMode::LeastSquares, "Find endpoints using a 2D least squares approach.")
.value("BoundingBox", BC1Encoder::EndpointMode::BoundingBox, "Find endpoints using a simple bounding box. Fast but inaccurate.")
.value("BoundingBoxInt", BC1Encoder::EndpointMode::BoundingBoxInt, "Same as BoundingBox but using integers, slightly faster.")
.value("PCA", BC1Encoder::EndpointMode::PCA, "Find endpoints using Principle Component Analysis. Slowest but highest quality method.");
py::enum_<BC1Encoder::ErrorMode>(bc1_encoder, "ErrorMode",
"Enum representing various methods of finding selectors in a block.")
.value("None", BC1Encoder::ErrorMode::None,
"The same as Faster but error is not calculated. This disables any cluster-fit options")
.value("Faster", BC1Encoder::ErrorMode::Faster,
"Use a slightly lower quality, but ~30% faster MSE evaluation function for 4-color blocks.")
py::enum_<BC1Encoder::ErrorMode>(bc1_encoder, "ErrorMode", "Enum representing various methods of finding selectors in a block.")
.value("None", BC1Encoder::ErrorMode::None, "The same as Faster but error is not calculated. This disables any cluster-fit options")
.value("Faster", BC1Encoder::ErrorMode::Faster, "Use a slightly lower quality, but ~30% faster MSE evaluation function for 4-color blocks.")
.value("Check2", BC1Encoder::ErrorMode::Check2, "Default error-checking method.")
.value("Full", BC1Encoder::ErrorMode::Full,
"Examine all colors to compute selectors/MSE. Slower but slightly higher quality.");
.value("Full", BC1Encoder::ErrorMode::Full, "Examine all colors to compute selectors/MSE. Slower but slightly higher quality.");
py::enum_<BC1Encoder::ColorMode>(bc1_encoder, "ColorMode",
"Enum representing various methods of writing BC1 blocks.")
.value("FourColor", BC1Encoder::ColorMode::FourColor,
"Default color mode. Only 4-color blocks will be output, where color0 > color1")
.value("ThreeColor", BC1Encoder::ColorMode::ThreeColor,
"Additionally use 3-color blocks when they have a lower error, where color0 <= color1")
py::enum_<BC1Encoder::ColorMode>(bc1_encoder, "ColorMode", "Enum representing various methods of writing BC1 blocks.")
.value("FourColor", BC1Encoder::ColorMode::FourColor, "Default color mode. Only 4-color blocks will be output, where color0 > color1")
.value("ThreeColor", BC1Encoder::ColorMode::ThreeColor, "Additionally use 3-color blocks when they have a lower error, where color0 <= color1")
.value("ThreeColorBlack", BC1Encoder::ColorMode::ThreeColorBlack,
"Additionally use 3-color blocks with black pixels (selector 3). Note that this requires your "
"shader/engine to not sample the alpha channel "
"Additionally use 3-color blocks with black pixels (selector 3). Note that this requires your shader/engine to not sample the alpha channel "
"when using a BC1 texture.");
bc1_encoder.def(py::init<unsigned, BC1Encoder::ColorMode>(), "level"_a = 5,
"color_mode"_a = BC1Encoder::ColorMode::FourColor);
bc1_encoder.def(py::init<unsigned, BC1Encoder::ColorMode, InterpolatorPtr>(), "level"_a, "color_mode"_a,
"interpolator"_a, R"doc(
bc1_encoder.def(py::init<unsigned, BC1Encoder::ColorMode>(), "level"_a = 5, "color_mode"_a = BC1Encoder::ColorMode::FourColor);
bc1_encoder.def(py::init<unsigned, BC1Encoder::ColorMode, InterpolatorPtr>(), "level"_a, "color_mode"_a, "interpolator"_a, R"doc(
Create a new BC1 encoder with the specified preset level, color mode, and interpolator.
:param int level: The preset level of the resulting encoder, between 0 and 18 inclusive. See :py:meth:`set_level` for more information. Default: 5.
@ -137,56 +125,44 @@ void InitBC1(py::module_ &s3tc) {
:param int level: The preset level of the resulting encoder, between 0 and 18 inclusive. Default: 5.
)doc");
bc1_encoder.def_property_readonly(
"interpolator", &BC1Encoder::GetInterpolator,
"The :py:class:`~quicktex.s3tc.interpolator.Interpolator` used by this encoder. This is a readonly property.");
bc1_encoder.def_property_readonly(
"color_mode", &BC1Encoder::GetColorMode,
"The :py:class:`~quicktex.s3tc.bc1.BC1Encoder.ColorMode` used by this encoder. This is a readonly property.");
bc1_encoder.def_property_readonly("interpolator", &BC1Encoder::GetInterpolator,
"The :py:class:`~quicktex.s3tc.interpolator.Interpolator` used by this encoder. This is a readonly property.");
bc1_encoder.def_property_readonly("color_mode", &BC1Encoder::GetColorMode,
"The :py:class:`~quicktex.s3tc.bc1.BC1Encoder.ColorMode` used by this encoder. This is a readonly property.");
// Advanced API
bc1_encoder.def_property("error_mode", &BC1Encoder::GetErrorMode, &BC1Encoder::SetErrorMode,
"The error mode used by this encoder for finding selectors.");
bc1_encoder.def_property("endpoint_mode", &BC1Encoder::GetEndpointMode, &BC1Encoder::SetEndpointMode,
"The endpoint mode used by this encoder.");
bc1_encoder.def_property("error_mode", &BC1Encoder::GetErrorMode, &BC1Encoder::SetErrorMode, "The error mode used by this encoder for finding selectors.");
bc1_encoder.def_property("endpoint_mode", &BC1Encoder::GetEndpointMode, &BC1Encoder::SetEndpointMode, "The endpoint mode used by this encoder.");
bc1_encoder.def_readwrite("two_ls_passes", &BC1Encoder::two_ls_passes,
"Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option).\n"
"Recommended if you're setting the orderings settings greater than 0.");
bc1_encoder.def_readwrite("two_ep_passes", &BC1Encoder::two_ep_passes,
"Try 2 different ways of choosing the initial endpoints.");
bc1_encoder.def_readwrite("two_ep_passes", &BC1Encoder::two_ep_passes, "Try 2 different ways of choosing the initial endpoints.");
bc1_encoder.def_readwrite(
"two_cf_passes", &BC1Encoder::two_cf_passes,
"Greatly increase encode time, with very slightly higher quality.\n"
"Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, "
"unless you just don't care about performance at all.");
bc1_encoder.def_readwrite("two_cf_passes", &BC1Encoder::two_cf_passes,
"Greatly increase encode time, with very slightly higher quality.\n"
"Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, "
"unless you just don't care about performance at all.");
bc1_encoder.def_readwrite(
"exhaustive", &BC1Encoder::exhaustive,
"Check all total orderings - *very* slow. The encoder is not designed to be used in this way");
bc1_encoder.def_readwrite("exhaustive", &BC1Encoder::exhaustive,
"Check all total orderings - *very* slow. The encoder is not designed to be used in this way");
bc1_encoder.def_property("search_rounds", &BC1Encoder::GetSearchRounds, &BC1Encoder::SetSearchRounds,
"Setting search rounds > 0 enables refining the final endpoints by examining nearby "
"colors. A higher value has a higher quality "
"Setting search rounds > 0 enables refining the final endpoints by examining nearby colors. A higher value has a higher quality "
"at the expense of performance.");
bc1_encoder.def_property(
"orderings", &BC1Encoder::GetOrderings, &BC1Encoder::SetOrderings,
"setting the orderings > 0 enables ordered cluster fit using a lookup table of similar blocks. Value is a "
"tuple of (4 color "
"orders, 3 color orders), where higher values have a higher quality at the expense of performance.");
bc1_encoder.def_property("orderings", &BC1Encoder::GetOrderings, &BC1Encoder::SetOrderings,
"setting the orderings > 0 enables ordered cluster fit using a lookup table of similar blocks. Value is a tuple of (4 color "
"orders, 3 color orders), where higher values have a higher quality at the expense of performance.");
bc1_encoder.def_readonly_static("max_power_iterations", &BC1Encoder::max_power_iterations);
bc1_encoder.def_readonly_static("min_power_iterations", &BC1Encoder::min_power_iterations);
bc1_encoder.def_property(
"power_iterations", &BC1Encoder::GetPowerIterations, &BC1Encoder::SetPowerIterations,
"Number of power iterations used with the PCA endpoint mode. Value should be around 4 to 6. "
"Automatically clamped to between :py:const:`BC1Encoder.min_power_iterations` and "
":py:const:`BC1Encoder.max_power_iterations`");
bc1_encoder.def_property("power_iterations", &BC1Encoder::GetPowerIterations, &BC1Encoder::SetPowerIterations,
"Number of power iterations used with the PCA endpoint mode. Value should be around 4 to 6. "
"Automatically clamped to between :py:const:`BC1Encoder.min_power_iterations` and :py:const:`BC1Encoder.max_power_iterations`");
// endregion
// region BC1Decoder
@ -209,10 +185,8 @@ void InitBC1(py::module_ &s3tc) {
:returns: A new RawTexture with the same dimensions as the input
)doc");
bc1_decoder.def_property_readonly("interpolator", &BC1Decoder::GetInterpolator,
"The interpolator used by this decoder. This is a readonly property.");
bc1_decoder.def_readwrite("write_alpha", &BC1Decoder::write_alpha,
"Determines if the alpha channel of the output is written to.");
bc1_decoder.def_property_readonly("interpolator", &BC1Decoder::GetInterpolator, "The interpolator used by this decoder. This is a readonly property.");
bc1_decoder.def_readwrite("write_alpha", &BC1Decoder::write_alpha, "Determines if the alpha channel of the output is written to.");
// endregion
}
} // namespace quicktex::bindings

View File

@ -21,8 +21,8 @@
#include <utility>
#include "s3tc/bc1/BC1Block.h"
#include "s3tc/bc4/BC4Block.h"
#include "../bc1/BC1Block.h"
#include "../bc4/BC4Block.h"
namespace quicktex::s3tc {
@ -54,9 +54,7 @@ class alignas(8) BC3Block {
color_block = blocks.second;
}
bool operator==(const BC3Block &Rhs) const {
return alpha_block == Rhs.alpha_block && color_block == Rhs.color_block;
}
bool operator==(const BC3Block &Rhs) const { return alpha_block == Rhs.alpha_block && color_block == Rhs.color_block; }
bool operator!=(const BC3Block &Rhs) const { return !(Rhs == *this); }
};
} // namespace quicktex::s3tc

View File

@ -21,13 +21,13 @@
#include <memory>
#include "ColorBlock.h"
#include "Decoder.h"
#include "s3tc/bc1/BC1Decoder.h"
#include "s3tc/bc3/BC3Block.h"
#include "s3tc/bc4/BC4Decoder.h"
#include "s3tc/interpolator/Interpolator.h"
#include "texture/BlockTexture.h"
#include "../../ColorBlock.h"
#include "../../Decoder.h"
#include "../../Texture.h"
#include "../bc1/BC1Decoder.h"
#include "../bc4/BC4Decoder.h"
#include "../interpolator/Interpolator.h"
#include "BC3Block.h"
namespace quicktex::s3tc {
@ -37,8 +37,7 @@ class BC3Decoder : public BlockDecoder<BlockTexture<BC3Block>> {
using BC4DecoderPtr = std::shared_ptr<BC4Decoder>;
using InterpolatorPtr = std::shared_ptr<Interpolator>;
BC3Decoder(InterpolatorPtr interpolator)
: _bc1_decoder(std::make_shared<BC1Decoder>(interpolator)), _bc4_decoder(std::make_shared<BC4Decoder>(3)) {}
BC3Decoder(InterpolatorPtr interpolator) : _bc1_decoder(std::make_shared<BC1Decoder>(interpolator)), _bc4_decoder(std::make_shared<BC4Decoder>(3)) {}
BC3Decoder() : BC3Decoder(std::make_shared<Interpolator>()) {}

View File

@ -19,8 +19,10 @@
#include "BC3Encoder.h"
#include "ColorBlock.h"
#include "s3tc/bc3/BC3Block.h"
#include "../../ColorBlock.h"
#include "../bc1/BC1Block.h"
#include "../bc4/BC4Block.h"
#include "BC3Block.h"
namespace quicktex::s3tc {
BC3Block BC3Encoder::EncodeBlock(const ColorBlock<4, 4> &pixels) const {

View File

@ -21,13 +21,13 @@
#include <memory>
#include "ColorBlock.h"
#include "Encoder.h"
#include "s3tc/bc1/BC1Encoder.h"
#include "s3tc/bc3/BC3Block.h"
#include "s3tc/bc4/BC4Encoder.h"
#include "s3tc/interpolator/Interpolator.h"
#include "texture/BlockTexture.h"
#include "../../ColorBlock.h"
#include "../../Encoder.h"
#include "../../Texture.h"
#include "../bc1/BC1Encoder.h"
#include "../bc4/BC4Encoder.h"
#include "../interpolator/Interpolator.h"
#include "BC3Block.h"
namespace quicktex::s3tc {
@ -38,8 +38,7 @@ class BC3Encoder : public BlockEncoder<BlockTexture<BC3Block>> {
using InterpolatorPtr = std::shared_ptr<Interpolator>;
BC3Encoder(unsigned level, InterpolatorPtr interpolator)
: _bc1_encoder(std::make_shared<BC1Encoder>(level, BC1Encoder::ColorMode::FourColor, interpolator)),
_bc4_encoder(std::make_shared<BC4Encoder>(3)) {}
: _bc1_encoder(std::make_shared<BC1Encoder>(level, BC1Encoder::ColorMode::FourColor, interpolator)), _bc4_encoder(std::make_shared<BC4Encoder>(3)) {}
BC3Encoder(unsigned level = 5) : BC3Encoder(level, std::make_shared<Interpolator>()) {}

View File

@ -22,14 +22,16 @@
#include <pybind11/pybind11.h>
#include <array>
#include <memory>
#include <cstddef>
#include <cstdint>
#include <stdexcept>
#include <string>
#include "s3tc/bc1/BC1Block.h"
#include "s3tc/bc3/BC3Block.h"
#include "s3tc/bc3/BC3Decoder.h"
#include "s3tc/bc3/BC3Encoder.h"
#include "s3tc/bc4/BC4Block.h"
#include "s3tc/interpolator/Interpolator.h"
#include "../../Decoder.h"
#include "../../Encoder.h"
#include "../interpolator/Interpolator.h"
#include "BC3Decoder.h"
#include "BC3Encoder.h"
namespace py = pybind11;
namespace quicktex::bindings {
@ -57,8 +59,7 @@ void InitBC3(py::module_ &s3tc) {
bc3_block.def_readwrite("alpha_block", &BC3Block::alpha_block, "The BC4 block used for alpha data.");
bc3_block.def_readwrite("color_block", &BC3Block::color_block, "The BC1 block used for rgb data.");
bc3_block.def_property("blocks", &BC3Block::GetBlocks, &BC3Block::SetBlocks,
"The BC4 and BC1 blocks that make up this block as a 2-tuple.");
bc3_block.def_property("blocks", &BC3Block::GetBlocks, &BC3Block::SetBlocks, "The BC4 and BC1 blocks that make up this block as a 2-tuple.");
// endregion
// region BC3Texture
@ -87,12 +88,10 @@ void InitBC3(py::module_ &s3tc) {
:returns: A new BC3Texture with the same dimension as the input.
)doc");
bc3_encoder.def_property_readonly(
"bc1_encoder", &BC3Encoder::GetBC1Encoder,
"Internal :py:class:`~quicktex.s3tc.bc1.BC1Encoder` used for RGB data. Readonly.");
bc3_encoder.def_property_readonly(
"bc4_encoder", &BC3Encoder::GetBC4Encoder,
"Internal :py:class:`~quicktex.s3tc.bc4.BC4Encoder` used for alpha data. Readonly.");
bc3_encoder.def_property_readonly("bc1_encoder", &BC3Encoder::GetBC1Encoder,
"Internal :py:class:`~quicktex.s3tc.bc1.BC1Encoder` used for RGB data. Readonly.");
bc3_encoder.def_property_readonly("bc4_encoder", &BC3Encoder::GetBC4Encoder,
"Internal :py:class:`~quicktex.s3tc.bc4.BC4Encoder` used for alpha data. Readonly.");
// endregion
// region BC3Decoder
@ -114,12 +113,10 @@ void InitBC3(py::module_ &s3tc) {
:returns: A new RawTexture with the same dimensions as the input
)doc");
bc3_decoder.def_property_readonly(
"bc1_decoder", &BC3Decoder::GetBC1Decoder,
"Internal :py:class:`~quicktex.s3tc.bc1.BC1Decoder` used for RGB data. Readonly.");
bc3_decoder.def_property_readonly(
"bc4_decoder", &BC3Decoder::GetBC4Decoder,
"Internal :py:class:`~quicktex.s3tc.bc4.BC4Decoder` used for alpha data. Readonly.");
bc3_decoder.def_property_readonly("bc1_decoder", &BC3Decoder::GetBC1Decoder,
"Internal :py:class:`~quicktex.s3tc.bc1.BC1Decoder` used for RGB data. Readonly.");
bc3_decoder.def_property_readonly("bc4_decoder", &BC3Decoder::GetBC4Decoder,
"Internal :py:class:`~quicktex.s3tc.bc4.BC4Decoder` used for alpha data. Readonly.");
// endregion
}
} // namespace quicktex::bindings

View File

@ -22,17 +22,14 @@
#include <algorithm>
#include <stdexcept>
#include "util/bitbash.h"
#include "util/map.h"
#include "util/math.h"
#include "util/ranges.h"
#include "../../util.h"
namespace quicktex::s3tc {
BC4Block::SelectorArray BC4Block::GetSelectors() const {
auto packed = pack<uint64_t>(_selectors, 8);
auto rows = unpack<uint16_t, Height>(packed, SelectorBits * Width);
return map([](auto row) { return unpack<uint8_t, Width>(row, SelectorBits); }, rows);
auto packed = Pack<uint8_t, uint64_t, 8, SelectorSize>(_selectors);
auto rows = Unpack<uint64_t, uint16_t, SelectorBits * Width, Height>(packed);
return MapArray(rows, Unpack<uint16_t, uint8_t, SelectorBits, Width>);
}
void BC4Block::SetSelectors(const BC4Block::SelectorArray& unpacked) {
@ -40,9 +37,9 @@ void BC4Block::SetSelectors(const BC4Block::SelectorArray& unpacked) {
if (std::any_of(unpacked[y].begin(), unpacked[y].end(), [](uint8_t i) { return i > SelectorMax; }))
throw std::invalid_argument("Selector value out of bounds.");
}
auto rows = map([](auto r) { return pack<uint16_t>(r, SelectorBits); }, unpacked);
auto packed = pack<uint64_t>(rows, SelectorBits * Width);
_selectors = unpack<uint8_t, SelectorSize>(packed, 8);
auto rows = MapArray(unpacked, Pack<uint8_t, uint16_t, SelectorBits, Width>);
auto packed = Pack<uint16_t, uint64_t, SelectorBits * Width, Height>(rows);
_selectors = Unpack<uint64_t, uint8_t, 8, SelectorSize>(packed);
}
std::array<uint8_t, 8> BC4Block::GetValues6() const {
@ -67,8 +64,6 @@ std::array<uint8_t, 8> BC4Block::GetValues8() const {
static_cast<uint8_t>((alpha0 + alpha1 * 6) / 7)};
}
bool BC4Block::operator==(const BC4Block& Rhs) const {
return alpha0 == Rhs.alpha0 && alpha1 == Rhs.alpha1 && _selectors == Rhs._selectors;
}
bool BC4Block::operator==(const BC4Block& Rhs) const { return alpha0 == Rhs.alpha0 && alpha1 == Rhs.alpha1 && _selectors == Rhs._selectors; }
bool BC4Block::operator!=(const BC4Block& Rhs) const { return !(Rhs == *this); }
} // namespace quicktex::s3tc

View File

@ -22,8 +22,8 @@
#include <array> // for array
#include <cassert> // for assert
#include "../../Color.h"
#include "../../ColorBlock.h"
#include "../../OldColor.h"
#include "BC4Block.h"
namespace quicktex::s3tc {

View File

@ -22,10 +22,10 @@
#include <cstdint>
#include <stdexcept>
#include "ColorBlock.h"
#include "Decoder.h"
#include "s3tc/bc4/BC4Block.h"
#include "texture/BlockTexture.h"
#include "../../ColorBlock.h"
#include "../../Decoder.h"
#include "../../Texture.h"
#include "BC4Block.h"
namespace quicktex::s3tc {

View File

@ -23,8 +23,8 @@
#include <array>
#include <cstdint>
#include "../../Color.h"
#include "../../ColorBlock.h"
#include "../../OldColor.h"
#include "BC4Block.h"
namespace quicktex::s3tc {

View File

@ -22,10 +22,10 @@
#include <cstdint>
#include <stdexcept>
#include "ColorBlock.h"
#include "Encoder.h"
#include "s3tc/bc4/BC4Block.h"
#include "texture/BlockTexture.h"
#include "../../ColorBlock.h"
#include "../../Encoder.h"
#include "../../Texture.h"
#include "BC4Block.h"
namespace quicktex::s3tc {

View File

@ -23,11 +23,15 @@
#include <pybind11/stl.h>
#include <array>
#include <cstddef>
#include <cstdint>
#include <stdexcept>
#include <string>
#include "s3tc/bc4/BC4Block.h"
#include "s3tc/bc4/BC4Decoder.h"
#include "s3tc/bc4/BC4Encoder.h"
#include "../../Decoder.h"
#include "../../Encoder.h"
#include "BC4Decoder.h"
#include "BC4Encoder.h"
namespace py = pybind11;
namespace quicktex::bindings {
@ -42,8 +46,7 @@ void InitBC4(py::module_ &s3tc) {
bc4_block.doc() = "A single BC4 block.";
bc4_block.def(py::init<>());
bc4_block.def(py::init<uint8_t, uint8_t, BC4Block::SelectorArray>(), "endpoint0"_a, "endpoint1"_a, "selectors"_a,
R"doc(
bc4_block.def(py::init<uint8_t, uint8_t, BC4Block::SelectorArray>(), "endpoint0"_a, "endpoint1"_a, "selectors"_a, R"doc(
Create a new BC4Block with the specified endpoints and selectors.
:param int endpoint0: The first endpoint.
@ -51,8 +54,7 @@ void InitBC4(py::module_ &s3tc) {
:param selectors: the selectors as a 4x4 list of integers, between 0 and 7 inclusive.
)doc");
bc4_block.def_property("endpoints", &BC4Block::GetAlphas, &BC4Block::SetAlphas,
"The block's endpoint values as a 2-tuple.");
bc4_block.def_property("endpoints", &BC4Block::GetAlphas, &BC4Block::SetAlphas, "The block's endpoint values as a 2-tuple.");
bc4_block.def_property("selectors", &BC4Block::GetSelectors, &BC4Block::SetSelectors, R"doc(
The block's selectors as a 4x4 list of integers between 0 and 7 inclusive.
@ -94,9 +96,8 @@ void InitBC4(py::module_ &s3tc) {
:param RawTexture texture: Input texture to encode.
:returns: A new BC4Texture with the same dimension as the input.
)doc");
bc4_encoder.def_property_readonly("channel", &BC4Encoder::GetChannel,
"The channel that will be read from. 0 to 3 inclusive. Readonly.");
bc4_encoder.def_property_readonly("channel", &BC4Encoder::GetChannel, "The channel that will be read from. 0 to 3 inclusive. Readonly.");
// endregion
// region BC4Decoder
@ -116,9 +117,8 @@ void InitBC4(py::module_ &s3tc) {
:param RawTexture texture: Input texture to encode.
:returns: A new RawTexture with the same dimensions as the input
)doc");
bc4_decoder.def_property_readonly("channel", &BC4Decoder::GetChannel,
"The channel that will be written to. 0 to 3 inclusive. Readonly.");
bc4_decoder.def_property_readonly("channel", &BC4Decoder::GetChannel, "The channel that will be written to. 0 to 3 inclusive. Readonly.");
// endregion
}

View File

@ -19,7 +19,9 @@
#pragma once
#include "s3tc/bc4/BC4Block.h"
#include <utility>
#include "../bc4/BC4Block.h"
namespace quicktex::s3tc {
@ -51,9 +53,7 @@ class alignas(8) BC5Block {
chan1_block = pair.second;
}
bool operator==(const BC5Block &Rhs) const {
return chan0_block == Rhs.chan0_block && chan1_block == Rhs.chan1_block;
}
bool operator==(const BC5Block &Rhs) const { return chan0_block == Rhs.chan0_block && chan1_block == Rhs.chan1_block; }
bool operator!=(const BC5Block &Rhs) const { return !(Rhs == *this); }
};
} // namespace quicktex::s3tc

View File

@ -19,8 +19,8 @@
#include "BC5Decoder.h"
#include "ColorBlock.h"
#include "s3tc/bc5/BC5Block.h"
#include "../../ColorBlock.h"
#include "BC5Block.h"
namespace quicktex::s3tc {
ColorBlock<4, 4> BC5Decoder::DecodeBlock(const BC5Block &block) const {

View File

@ -24,11 +24,11 @@
#include <tuple>
#include <type_traits>
#include "ColorBlock.h"
#include "Decoder.h"
#include "s3tc/bc4/BC4Decoder.h"
#include "s3tc/bc5/BC5Block.h"
#include "texture/BlockTexture.h"
#include "../../ColorBlock.h"
#include "../../Decoder.h"
#include "../../Texture.h"
#include "../bc4/BC4Decoder.h"
#include "BC5Block.h"
namespace quicktex::s3tc {
@ -38,10 +38,8 @@ class BC5Decoder : public BlockDecoder<BlockTexture<BC5Block>> {
using BC4DecoderPtr = std::shared_ptr<BC4Decoder>;
using BC4DecoderPair = std::tuple<BC4DecoderPtr, BC4DecoderPtr>;
BC5Decoder(uint8_t chan0 = 0, uint8_t chan1 = 1)
: BC5Decoder(std::make_shared<BC4Decoder>(chan0), std::make_shared<BC4Decoder>(chan1)) {}
BC5Decoder(BC4DecoderPtr chan0_decoder, BC4DecoderPtr chan1_decoder)
: _chan0_decoder(chan0_decoder), _chan1_decoder(chan1_decoder) {}
BC5Decoder(uint8_t chan0 = 0, uint8_t chan1 = 1) : BC5Decoder(std::make_shared<BC4Decoder>(chan0), std::make_shared<BC4Decoder>(chan1)) {}
BC5Decoder(BC4DecoderPtr chan0_decoder, BC4DecoderPtr chan1_decoder) : _chan0_decoder(chan0_decoder), _chan1_decoder(chan1_decoder) {}
ColorBlock<4, 4> DecodeBlock(const BC5Block &block) const override;

View File

@ -19,8 +19,8 @@
#include "BC5Encoder.h"
#include "ColorBlock.h"
#include "s3tc/bc4/BC4Block.h"
#include "../../ColorBlock.h"
#include "../bc4/BC4Block.h"
namespace quicktex::s3tc {
BC5Block BC5Encoder::EncodeBlock(const ColorBlock<4, 4> &pixels) const {

View File

@ -24,11 +24,11 @@
#include <tuple>
#include <type_traits>
#include "ColorBlock.h"
#include "Encoder.h"
#include "s3tc/bc4/BC4Encoder.h"
#include "s3tc/bc5/BC5Block.h"
#include "texture/BlockTexture.h"
#include "../../ColorBlock.h"
#include "../../Encoder.h"
#include "../../Texture.h"
#include "../bc4/BC4Encoder.h"
#include "BC5Block.h"
namespace quicktex::s3tc {
class BC5Encoder : public BlockEncoder<BlockTexture<BC5Block>> {
@ -37,10 +37,8 @@ class BC5Encoder : public BlockEncoder<BlockTexture<BC5Block>> {
using BC4EncoderPtr = std::shared_ptr<BC4Encoder>;
using BC4EncoderPair = std::tuple<BC4EncoderPtr, BC4EncoderPtr>;
BC5Encoder(uint8_t chan0 = 0, uint8_t chan1 = 1)
: BC5Encoder(std::make_shared<BC4Encoder>(chan0), std::make_shared<BC4Encoder>(chan1)) {}
BC5Encoder(BC4EncoderPtr chan0_encoder, BC4EncoderPtr chan1_encoder)
: _chan0_encoder(chan0_encoder), _chan1_encoder(chan1_encoder) {}
BC5Encoder(uint8_t chan0 = 0, uint8_t chan1 = 1) : BC5Encoder(std::make_shared<BC4Encoder>(chan0), std::make_shared<BC4Encoder>(chan1)) {}
BC5Encoder(BC4EncoderPtr chan0_encoder, BC4EncoderPtr chan1_encoder) : _chan0_encoder(chan0_encoder), _chan1_encoder(chan1_encoder) {}
BC5Block EncodeBlock(const ColorBlock<4, 4> &pixels) const override;

View File

@ -24,10 +24,10 @@
#include <array>
#include <cstdint>
#include "s3tc/bc4/BC4Block.h"
#include "s3tc/bc5/BC5Block.h"
#include "s3tc/bc5/BC5Decoder.h"
#include "s3tc/bc5/BC5Encoder.h"
#include "../../Decoder.h"
#include "../../Encoder.h"
#include "BC5Decoder.h"
#include "BC5Encoder.h"
namespace py = pybind11;
namespace quicktex::bindings {
@ -52,8 +52,7 @@ void InitBC5(py::module_ &s3tc) {
bc5_block.def_readwrite("chan0_block", &BC5Block::chan0_block, "The BC4 block used for the first channel.");
bc5_block.def_readwrite("chan1_block", &BC5Block::chan1_block, "The BC4 block used for the second channel.");
bc5_block.def_property("blocks", &BC5Block::GetBlocks, &BC5Block::SetBlocks,
"The BC4 and BC1 blocks that make up this block as a 2-tuple.");
bc5_block.def_property("blocks", &BC5Block::GetBlocks, &BC5Block::SetBlocks, "The BC4 and BC1 blocks that make up this block as a 2-tuple.");
// endregion
// region BC5Texture
@ -80,11 +79,9 @@ void InitBC5(py::module_ &s3tc) {
:returns: A new BC5Texture with the same dimension as the input.
)doc");
bc5_encoder.def_property_readonly("channels", &BC5Encoder::GetChannels,
"A 2-tuple of channels that will be read from. 0 to 3 inclusive. Readonly.");
bc5_encoder.def_property_readonly(
"bc4_encoders", &BC5Encoder::GetBC4Encoders,
"2-tuple of internal :py:class:`~quicktex.s3tc.bc4.BC4Encoder` s used for each channel. Readonly.");
bc5_encoder.def_property_readonly("channels", &BC5Encoder::GetChannels, "A 2-tuple of channels that will be read from. 0 to 3 inclusive. Readonly.");
bc5_encoder.def_property_readonly("bc4_encoders", &BC5Encoder::GetBC4Encoders,
"2-tuple of internal :py:class:`~quicktex.s3tc.bc4.BC4Encoder` s used for each channel. Readonly.");
// endregion
// region BC5Decoder
@ -106,11 +103,9 @@ void InitBC5(py::module_ &s3tc) {
:returns: A new RawTexture with the same dimensions as the input
)doc");
bc5_decoder.def_property_readonly("channels", &BC5Decoder::GetChannels,
"A 2-tuple of channels that will be written to. 0 to 3 inclusive. Readonly.");
bc5_decoder.def_property_readonly(
"bc4_decoders", &BC5Decoder::GetBC4Decoders,
"2-tuple of internal :py:class:`~quicktex.s3tc.bc4.BC4Decoder` s used for each channel. Readonly.");
bc5_decoder.def_property_readonly("channels", &BC5Decoder::GetChannels, "A 2-tuple of channels that will be written to. 0 to 3 inclusive. Readonly.");
bc5_decoder.def_property_readonly("bc4_decoders", &BC5Decoder::GetBC4Decoders,
"2-tuple of internal :py:class:`~quicktex.s3tc.bc4.BC4Decoder` s used for each channel. Readonly.");
// endregion
}
} // namespace quicktex::bindings

View File

@ -24,8 +24,8 @@
#include <cstdint>
#include <stdexcept>
#include "OldColor.h"
#include "util/bitbash.h"
#include "../../util.h"
#include "../../Color.h"
namespace quicktex::s3tc {
@ -45,33 +45,25 @@ std::unique_ptr<Interpolator> Interpolator::MakeInterpolator(Interpolator::Type
}
}
uint8_t Interpolator::Interpolate5(uint8_t v0, uint8_t v1) const {
return Interpolate8(scale_to_8<5>(v0), scale_to_8<5>(v1));
}
uint8_t Interpolator::Interpolate6(uint8_t v0, uint8_t v1) const {
return Interpolate8(scale_to_8<6>(v0), scale_to_8<6>(v1));
}
uint8_t Interpolator::InterpolateHalf5(uint8_t v0, uint8_t v1) const {
return InterpolateHalf8(scale_to_8<5>(v0), scale_to_8<5>(v1));
}
uint8_t Interpolator::InterpolateHalf6(uint8_t v0, uint8_t v1) const {
return InterpolateHalf8(scale_to_8<6>(v0), scale_to_8<6>(v1));
}
uint8_t Interpolator::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
uint8_t Interpolator::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
uint8_t Interpolator::InterpolateHalf5(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
uint8_t Interpolator::InterpolateHalf6(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
std::array<OldColor, 4> Interpolator::Interpolate565BC1(uint16_t low, uint16_t high, bool allow_3color) const {
std::array<Color, 4> Interpolator::Interpolate565BC1(uint16_t low, uint16_t high, bool allow_3color) const {
bool use_3color = allow_3color && (high >= low);
return InterpolateBC1(OldColor::Unpack565Unscaled(low), OldColor::Unpack565Unscaled(high), use_3color);
return InterpolateBC1(Color::Unpack565Unscaled(low), Color::Unpack565Unscaled(high), use_3color);
}
std::array<OldColor, 4> Interpolator::InterpolateBC1(OldColor low, OldColor high, bool use_3color) const {
auto colors = std::array<OldColor, 4>();
std::array<Color, 4> Interpolator::InterpolateBC1(Color low, Color high, bool use_3color) const {
auto colors = std::array<Color, 4>();
colors[0] = low.ScaleFrom565();
colors[1] = high.ScaleFrom565();
if (use_3color) {
// 3-color mode
colors[2] = InterpolateHalfColor24(colors[0], colors[1]);
colors[3] = OldColor(0, 0, 0, 0); // transparent black
colors[3] = Color(0, 0, 0, 0); // transparent black
} else {
// 4-color mode
colors[2] = InterpolateColor24(colors[0], colors[1]);
@ -87,12 +79,8 @@ uint8_t Interpolator::InterpolateHalf8(uint8_t v0, uint8_t v1) const { return (v
// endregion
// region InterpolatorRound implementation
uint8_t InterpolatorRound::Interpolate5(uint8_t v0, uint8_t v1) const {
return Interpolate8(scale_to_8<5>(v0), scale_to_8<5>(v1));
}
uint8_t InterpolatorRound::Interpolate6(uint8_t v0, uint8_t v1) const {
return Interpolate8(scale_to_8<6>(v0), scale_to_8<6>(v1));
}
uint8_t InterpolatorRound::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
uint8_t InterpolatorRound::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
uint8_t InterpolatorRound::Interpolate8(uint8_t v0, uint8_t v1) const { return (v0 * 2 + v1 + 1) / 3; }
// endregion
@ -120,9 +108,9 @@ uint8_t InterpolatorNvidia::InterpolateHalf6(uint8_t v0, uint8_t v1) const {
return static_cast<uint8_t>((256 * v0 + gdiff / 4 + 128 + gdiff * 128) >> 8);
}
std::array<OldColor, 4> InterpolatorNvidia::InterpolateBC1(OldColor low, OldColor high, bool use_3color) const {
std::array<Color, 4> InterpolatorNvidia::InterpolateBC1(Color low, Color high, bool use_3color) const {
// Nvidia is special and interpolation cant be done with 8-bit values, so we need to override the default behavior
std::array<OldColor, 4> colors;
std::array<Color, 4> colors;
colors[0] = low.ScaleFrom565();
colors[1] = high.ScaleFrom565();
@ -133,7 +121,7 @@ std::array<OldColor, 4> InterpolatorNvidia::InterpolateBC1(OldColor low, OldColo
} else {
// 3-color mode
colors[2] = InterpolateHalfColor565(low, high);
colors[3] = OldColor(0, 0, 0, 0); // transparent black
colors[3] = Color(0, 0, 0, 0); // transparent black
}
return colors;
@ -141,18 +129,10 @@ std::array<OldColor, 4> InterpolatorNvidia::InterpolateBC1(OldColor low, OldColo
// endregion
// region InterpolatorAMD implementation
uint8_t InterpolatorAMD::Interpolate5(uint8_t v0, uint8_t v1) const {
return Interpolate8(scale_to_8<5>(v0), scale_to_8<5>(v1));
}
uint8_t InterpolatorAMD::Interpolate6(uint8_t v0, uint8_t v1) const {
return Interpolate8(scale_to_8<6>(v0), scale_to_8<6>(v1));
}
uint8_t InterpolatorAMD::InterpolateHalf5(uint8_t v0, uint8_t v1) const {
return InterpolateHalf8(scale_to_8<5>(v0), scale_to_8<5>(v1));
}
uint8_t InterpolatorAMD::InterpolateHalf6(uint8_t v0, uint8_t v1) const {
return InterpolateHalf8(scale_to_8<6>(v0), scale_to_8<6>(v1));
}
uint8_t InterpolatorAMD::Interpolate5(uint8_t v0, uint8_t v1) const { return Interpolate8(scale5To8(v0), scale5To8(v1)); }
uint8_t InterpolatorAMD::Interpolate6(uint8_t v0, uint8_t v1) const { return Interpolate8(scale6To8(v0), scale6To8(v1)); }
uint8_t InterpolatorAMD::InterpolateHalf5(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale5To8(v0), scale5To8(v1)); }
uint8_t InterpolatorAMD::InterpolateHalf6(uint8_t v0, uint8_t v1) const { return InterpolateHalf8(scale6To8(v0), scale6To8(v1)); }
uint8_t InterpolatorAMD::Interpolate8(uint8_t v0, uint8_t v1) const { return (v0 * 43 + v1 * 21 + 32) >> 6; }

View File

@ -22,7 +22,7 @@
#include <cstdint> // for uint8_t, uint16_t
#include <memory> // for unique_ptr
#include "OldColor.h" // for Color
#include "../../Color.h" // for Color
namespace quicktex::s3tc {
@ -97,7 +97,7 @@ class Interpolator {
* @param allow_3color if true, a different interpolation mode will be used if high >= low
* @return an array of 4 Color values, with indices matching BC1 selectors
*/
std::array<OldColor, 4> Interpolate565BC1(uint16_t low, uint16_t high, bool allow_3color = true) const;
std::array<Color, 4> Interpolate565BC1(uint16_t low, uint16_t high, bool allow_3color = true) const;
/**
* Generates the 4 colors for a BC1 block from the given
@ -106,7 +106,7 @@ class Interpolator {
* @param use_3color if the 3-color interpolation mode should be used
* @return an array of 4 Color values, with indices matching BC1 selectors
*/
virtual std::array<OldColor, 4> InterpolateBC1(OldColor low, OldColor high, bool use_3color) const;
virtual std::array<Color, 4> InterpolateBC1(Color low, Color high, bool use_3color) const;
/**
* Gets the type of an interpolator
@ -126,12 +126,12 @@ class Interpolator {
}
private:
OldColor InterpolateColor24(const OldColor &c0, const OldColor &c1) const {
return OldColor(Interpolate8(c0.r, c1.r), Interpolate8(c0.g, c1.g), Interpolate8(c0.b, c1.b));
Color InterpolateColor24(const Color &c0, const Color &c1) const {
return Color(Interpolate8(c0.r, c1.r), Interpolate8(c0.g, c1.g), Interpolate8(c0.b, c1.b));
}
OldColor InterpolateHalfColor24(const OldColor &c0, const OldColor &c1) const {
return OldColor(InterpolateHalf8(c0.r, c1.r), InterpolateHalf8(c0.g, c1.g), InterpolateHalf8(c0.b, c1.b));
Color InterpolateHalfColor24(const Color &c0, const Color &c1) const {
return Color(InterpolateHalf8(c0.r, c1.r), InterpolateHalf8(c0.g, c1.g), InterpolateHalf8(c0.b, c1.b));
}
};
@ -152,18 +152,18 @@ class InterpolatorNvidia final : public Interpolator {
virtual uint8_t InterpolateHalf5(uint8_t v0, uint8_t v1) const override;
virtual uint8_t InterpolateHalf6(uint8_t v0, uint8_t v1) const override;
virtual std::array<OldColor, 4> InterpolateBC1(OldColor low, OldColor high, bool use_3color) const override;
virtual std::array<Color, 4> InterpolateBC1(Color low, Color high, bool use_3color) const override;
virtual Type GetType() const noexcept override { return Type::Nvidia; }
virtual bool CanInterpolate8Bit() const noexcept override { return false; }
private:
OldColor InterpolateColor565(const OldColor &c0, const OldColor &c1) const {
return OldColor(Interpolate5(c0.r, c1.r), Interpolate6(c0.g, c1.g), Interpolate5(c0.b, c1.b));
Color InterpolateColor565(const Color &c0, const Color &c1) const {
return Color(Interpolate5(c0.r, c1.r), Interpolate6(c0.g, c1.g), Interpolate5(c0.b, c1.b));
}
OldColor InterpolateHalfColor565(const OldColor &c0, const OldColor &c1) const {
return OldColor(InterpolateHalf5(c0.r, c1.r), InterpolateHalf6(c0.g, c1.g), InterpolateHalf5(c0.b, c1.b));
Color InterpolateHalfColor565(const Color &c0, const Color &c1) const {
return Color(InterpolateHalf5(c0.r, c1.r), InterpolateHalf6(c0.g, c1.g), InterpolateHalf5(c0.b, c1.b));
}
};

View File

@ -1,31 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <array>
#include <cstdint>
#include <xsimd/xsimd.hpp>
#include "Matrix.h"
// Type your code here, or load an example.
namespace quicktex {
auto test(Matrix<float, 4, 1> a, Matrix<float, 4, 1> b, Matrix<float, 4, 1> c) {
return a * 7;
};
} // namespace quicktex

View File

@ -1,70 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <vector>
#include "Texture.h"
namespace quicktex {
template <typename B> class BlockTexture final : public Texture {
private:
std::vector<B> _blocks;
unsigned _width_b;
unsigned _height_b;
public:
using BlockType = B;
using Base = Texture;
/**
* Create a new BlockTexture
* @param width width of the texture in pixels. must be divisible by B::width
* @param height height of the texture in pixels. must be divisible by B::height
*/
BlockTexture(int w, int h) : Base(w, h) {
_width_b = (width + B::Width - 1) / B::Width;
_height_b = (height + B::Height - 1) / B::Height;
_blocks = std::vector<B>(_width_b * _height_b);
}
constexpr unsigned bwidth() const { return _width_b; }
constexpr unsigned bheight() const { return _height_b; }
constexpr std::tuple<int, int> bsize() const { return std::tuple<int, int>(_width_b, _height_b); }
B get_block(unsigned x, unsigned y) const {
if (x >= _width_b) throw std::out_of_range("x value out of range.");
if (y >= _height_b) throw std::out_of_range("y value out of range.");
return _blocks.at(x + (y * _width_b));
}
void set_block(unsigned x, unsigned y, const B &val) {
if (x >= _width_b) throw std::out_of_range("x value out of range.");
if (y >= _height_b) throw std::out_of_range("y value out of range.");
_blocks.at(x + (y * _width_b)) = val;
}
size_t nbytes() const noexcept override { return _blocks.size() * sizeof(B); }
const uint8_t *data() const noexcept override { return reinterpret_cast<const uint8_t *>(_blocks.data()); }
uint8_t *data() noexcept override { return reinterpret_cast<uint8_t *>(_blocks.data()); }
};
} // namespace quicktex

View File

@ -1,33 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "RawTexture.h"
namespace quicktex {
Color RawTexture::pixel(unsigned x, unsigned y) const {
if (x >= width) throw std::invalid_argument("x value out of range.");
if (y >= height) throw std::invalid_argument("y value out of range.");
return _pixels.at(x + (y * width));
}
quicktex::Color& RawTexture::pixel(unsigned x, unsigned y) {
if (x >= width) throw std::invalid_argument("x value out of range.");
if (y >= height) throw std::invalid_argument("y value out of range.");
return _pixels.at(x + (y * width));
}
} // namespace quicktex

View File

@ -1,97 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <climits>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <memory>
#include <stdexcept>
#include <tuple>
#include <type_traits>
#include <vector>
#include "Color.h"
#include "ColorBlock.h"
#include "OldColor.h"
#include "texture/Texture.h"
namespace quicktex {
class RawTexture : public Texture {
using Base = Texture;
public:
/**
* Create a new RawTexture
* @param width width of the texture in pixels
* @param height height of the texture in pixels
*/
RawTexture(int w, int h) : Base(w, h), _pixels(w* h) {}
quicktex::Color pixel(unsigned x, unsigned y) const;
quicktex::Color &pixel(unsigned x, unsigned y);
quicktex::Color pixel_wrapped(unsigned x, unsigned y) const { return pixel(x % width, y % height); }
quicktex::Color &pixel_wrapped(unsigned x, unsigned y) { return pixel(x % width, y % height); }
size_t nbytes() const noexcept override { return static_cast<size_t>(width * height) * sizeof(quicktex::Color); }
template <int N, int M> quicktex::ColorBlock<N, M> get_block(int block_x, int block_y) const {
if (block_x < 0) throw std::out_of_range("x value out of range.");
if (block_y < 0) throw std::out_of_range("y value out of range.");
// coordinates in the image of the top-left pixel of the selected block
quicktex::ColorBlock<N, M> block;
int pixel_x = block_x * N;
int pixel_y = block_y * M;
// slower pixel-wise copy if the block goes over the edges
for (int x = 0; x < N; x++) {
for (int y = 0; y < M; y++) { block.Set(x, y, pixel((pixel_x + x) % width, (pixel_y + y) % height)); }
}
return block;
}
template <int N, int M> void set_block(int block_x, int block_y, const quicktex::ColorBlock<N, M> &block) {
if (block_x < 0) throw std::out_of_range("x value out of range.");
if (block_y < 0) throw std::out_of_range("y value out of range.");
// coordinates in the image of the top-left pixel of the selected block
int pixel_x = block_x * N;
int pixel_y = block_y * M;
// slower pixel-wise copy if the block goes over the edges
for (int x = 0; x < N; x++) {
for (int y = 0; y < M; y++) { pixel((pixel_x + x) % width, (pixel_y + y) % height) = block.Get(x, y); }
}
}
virtual const uint8_t *data() const noexcept override { return reinterpret_cast<const uint8_t *>(_pixels.data()); }
virtual uint8_t *data() noexcept override { return reinterpret_cast<uint8_t *>(_pixels.data()); }
protected:
std::vector<quicktex::Color> _pixels;
};
} // namespace quicktex

View File

@ -1,62 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021-2022 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <climits>
#include <cstdint>
#include <cstdio>
#include <cstring>
#include <memory>
#include <stdexcept>
#include <tuple>
#include <type_traits>
#include <vector>
#include "Color.h"
#include "ColorBlock.h"
#include "OldColor.h"
#include "Window.h"
namespace quicktex {
class Texture {
public:
const unsigned width;
const unsigned height;
virtual ~Texture() = default;
virtual std::tuple<unsigned, unsigned> Size() const { return {width, height}; }
/**
* The texture's total size
* @return The size of the texture in bytes.
*/
virtual size_t nbytes() const noexcept = 0;
virtual const uint8_t *data() const noexcept = 0;
virtual uint8_t *data() noexcept = 0;
protected:
Texture(unsigned w, unsigned h) : width(w), height(h) {}
};
} // namespace quicktex

View File

@ -1,90 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Window.h"
#include "texture/RawTexture.h"
namespace quicktex {
// Window
Window::Window(RawTexture& texture, unsigned w, unsigned h, unsigned px, unsigned py)
: width(w), height(h), x(px), y(py), _texture(texture) {
assert(x < texture.width);
assert(y < texture.height);
}
Color& Window::pixel(unsigned px, unsigned py) {
assert(px < width && py < height);
return _texture.pixel(x + px, y + py);
}
Color Window::pixel(unsigned px, unsigned py) const {
assert(px < width && py < height);
return _texture.pixel(x + px, y + py);
}
WindowIterator Window::begin() { return WindowIterator(*this, 0, 0); }
WindowIterator Window::end() { return WindowIterator(*this, 0, height); }
WindowIterator Window::row_begin(unsigned int row) { return WindowIterator(*this, 0, row); }
WindowIterator Window::row_end(unsigned int row) { return WindowIterator(*this, 0, row + 1); }
bool Window::operator==(const Window& rhs) const {
return width == rhs.width && height == rhs.height && x == rhs.x && y == rhs.y && &_texture == &rhs._texture;
}
// WindowIterator
WindowIterator::WindowIterator(Window& view, unsigned px, unsigned py) : x(px), y(py), _view(&view) {
assert(x < view.width);
assert(y < view.height || (y == view.height && x == 0));
// if y == the height, and x == 0, then this is a sentinel for the end of iteration, and cannot be dereferenced
}
WindowIterator& quicktex::WindowIterator::operator++() { // prefix increment
x++;
if (x >= _view->width) {
x = 0;
y++;
}
return *this;
}
WindowIterator WindowIterator::operator++(int) { // postfix increment
WindowIterator old = *this;
++(*this);
return old;
}
Color& WindowIterator::operator*() const { // dereference operator
assert(_view != nullptr);
assert(x < _view->width && y < _view->height);
return _view->pixel(x, y);
}
Color* WindowIterator::operator->() { return &(**this); } // returns a pointer to what's returned by operator*
bool WindowIterator::operator==(const WindowIterator& rhs) const {
return x == rhs.x && y == rhs.y && _view == rhs._view;
}
static_assert(std::forward_iterator<WindowIterator>);
// static_assert(sized_range<Window>);
} // namespace quicktex

View File

@ -1,82 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "Color.h"
#include "util/ranges.h"
namespace quicktex {
// forward declarations
class WindowIterator;
class RawTexture;
/**
* Class representing a window into a RawTexture
*/
class Window {
public:
typedef Color value_type;
const unsigned width, height;
const unsigned x, y;
Window(RawTexture &texture, unsigned w, unsigned h, unsigned x, unsigned y);
Color &pixel(unsigned px, unsigned py);
Color pixel(unsigned px, unsigned py) const;
WindowIterator begin();
WindowIterator end();
WindowIterator row_begin(unsigned row);
WindowIterator row_end(unsigned row);
size_t size() const { return width * height; }
bool operator==(const Window &rhs) const;
private:
RawTexture &_texture;
};
/**
* Iterator returned by Window
*/
class WindowIterator {
public:
typedef long long difference_type;
typedef Color value_type;
unsigned x, y;
WindowIterator(Window &view, unsigned x, unsigned y);
WindowIterator() : x(0), y(0), _view(nullptr) {}
Color &operator*() const; // dereference
Color *operator->(); // member access
WindowIterator &operator++(); // prefix increment
WindowIterator operator++(int); // postfix increment
bool operator==(const WindowIterator &rhs) const;
private:
Window *_view;
};
} // namespace quicktex

178
quicktex/util.h Normal file
View File

@ -0,0 +1,178 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021-2022 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cassert>
#include <cstdint>
#include <limits>
#include <string>
#include <type_traits>
#include <functional>
#include <vector>
#define UINT5_MAX 0x1FU // 31
#define UINT6_MAX 0x3FU // 63
#define assert5bit(x) assert(x <= UINT5_MAX)
#define assert6bit(x) assert(x <= UINT6_MAX)
template <typename S> constexpr auto iabs(S i) {
static_assert(!std::is_unsigned<S>::value);
using O = typename std::make_unsigned<S>::type;
return (i < 0) ? static_cast<O>(-i) : static_cast<O>(i);
}
/**
* Unpacks an unsigned integer into an array of smaller integers.
* @tparam I Input data type. Must be an unsigned integral type large enough to hold C * N bits.
* @tparam O Output data type. must be an unsigned integral type large enough to hold C bits..
* @tparam S Number of bits in each value.
* @tparam C Number of values to unpack.
* @param packed Packed integer input of type I.
* @return Unpacked std::array of type O and size C.
*/
template <typename I, typename O, size_t S, size_t C> constexpr std::array<O, C> Unpack(I packed) {
// type checking
static_assert(std::is_unsigned<I>::value, "Packed input type must be unsigned");
static_assert(std::is_unsigned<O>::value, "Unpacked output type must be unsigned");
static_assert(std::numeric_limits<I>::digits >= (C * S), "Packed input type must be big enough to represent the number of bits multiplied by count");
static_assert(std::numeric_limits<O>::digits >= S, "Unpacked output type must be big enough to represent the number of bits");
constexpr O mask = (1U << S) - 1U; // maximum value representable by N bits
std::array<O, C> vals; // output values array of size C
for (unsigned i = 0; i < C; i++) {
vals[i] = static_cast<O>(packed >> (i * S)) & mask;
assert(vals[i] <= mask);
}
return vals;
}
/**
* Packs an array of unsigned integers into a single integer.
* @tparam I Input data type. Must be an unsigned integral type large enough to hold C bits.
* @tparam O Output data type. must be an unsigned integral type large enough to hold C * N bits.
* @tparam S Number of bits in each value.
* @tparam C Number of values to unpack.
* @param vals Unpacked std::array of type I and size C.
* @return Packed integer input of type O.
*/
template <typename I, typename O, size_t S, size_t C> constexpr O Pack(const std::array<I, C> &vals) {
// type checking
static_assert(std::is_unsigned<I>::value, "Unpacked input type must be unsigned");
static_assert(std::is_unsigned<O>::value, "Packed output type must be unsigned");
static_assert(std::numeric_limits<I>::digits >= S, "Unpacked input type must be big enough to represent the number of bits");
static_assert(std::numeric_limits<O>::digits >= (C * S), "Packed output type must be big enough to represent the number of bits multiplied by count");
O packed = 0; // output value of type O
for (unsigned i = 0; i < C; i++) {
assert(vals[i] <= (1U << S) - 1U);
packed |= static_cast<O>(vals[i]) << (i * S);
}
assert(packed <= (static_cast<O>(1U) << (C * S)) - 1U);
return packed;
}
template <size_t Size, int Op(int)> constexpr std::array<uint8_t, Size> ExpandArray() {
std::array<uint8_t, Size> res;
for (int i = 0; i < Size; i++) { res[i] = Op(i); }
return res;
}
template <typename Seq, typename Fn> constexpr auto MapArray(const Seq &input, Fn op) {
using I = typename Seq::value_type;
using O = decltype(op(std::declval<I>()));
constexpr size_t N = std::tuple_size<Seq>::value;
std::array<O, N> output;
for (unsigned i = 0; i < N; i++) { output[i] = op(input[i]); }
return output;
}
template <typename S> constexpr S scale8To5(S v) {
auto v2 = v * 31 + 128;
return static_cast<S>((v2 + (v2 >> 8)) >> 8);
}
template <typename S> constexpr S scale8To6(S v) {
auto v2 = v * 63 + 128;
return static_cast<S>((v2 + (v2 >> 8)) >> 8);
}
template <typename S> constexpr S scale5To8(S v) {
assert5bit(v);
return static_cast<S>((v << 3) | (v >> 2));
}
template <typename S> constexpr S scale6To8(S v) {
assert6bit(v);
return static_cast<S>((v << 2) | (v >> 4));
}
template <typename S> constexpr S maximum(S a, S b) { return (a > b) ? a : b; }
template <typename S> constexpr S maximum(S a, S b, S c) { return maximum(maximum(a, b), c); }
template <typename S> constexpr S maximum(S a, S b, S c, S d) { return maximum(maximum(maximum(a, b), c), d); }
template <typename S> constexpr S minimum(S a, S b) { return (a < b) ? a : b; }
template <typename S> constexpr S minimum(S a, S b, S c) { return minimum(minimum(a, b), c); }
template <typename S> constexpr S minimum(S a, S b, S c, S d) { return minimum(minimum(minimum(a, b), c), d); }
template <typename T> constexpr T square(T a) { return a * a; }
constexpr float clampf(float value, float low = 0.0f, float high = 1.0f) {
if (value < low)
value = low;
else if (value > high)
value = high;
return value;
}
constexpr uint8_t clamp255(int32_t i) { return static_cast<uint8_t>((static_cast<unsigned int>(i) & 0xFFFFFF00U) ? (~(i >> 31)) : i); }
template <typename S> constexpr S clamp(S value, S low, S high) { return (value < low) ? low : ((value > high) ? high : value); }
constexpr int32_t clampi(int32_t value, int32_t low, int32_t high) {
if (value < low)
value = low;
else if (value > high)
value = high;
return value;
}
constexpr int squarei(int a) { return a * a; }
constexpr int absi(int a) { return (a < 0) ? -a : a; }
template <typename F> constexpr F lerp(F a, F b, F s) { return a + (b - a) * s; }
template <typename... Args> std::string Format(const char *str, const Args &...args) {
auto output = std::string(str);
std::vector<std::string> values = {{args...}};
for (unsigned i = 0; i < values.size(); i++) {
auto key = "{" + std::to_string(i) + "}";
auto value = values[i];
while (true) {
size_t where = output.find(key);
if (where == output.npos) break;
output.replace(where, key.length(), value);
}
}
return output;
}

View File

@ -1,313 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <cassert>
#include <concepts>
#include <limits>
#include <numeric>
#include <type_traits>
#include "iterator.h"
#include "util/math.h"
#include "util/ranges.h"
#define UINT5_MAX 0x1FU // 31
#define UINT6_MAX 0x3FU // 63
#define assert5bit(x) assert(x <= UINT5_MAX)
#define assert6bit(x) assert(x <= UINT6_MAX)
namespace quicktex {
template <size_t N, typename S> S scale_from_8(S v) {
static_assert(N < 8);
assert(v < (1 << 8));
unsigned max = (1 << N) - 1;
unsigned v2 = (v * max) + 128;
auto result = static_cast<S>((v2 + (v2 >> 8)) >> 8);
assert(result < (1 << N));
return result;
}
template <size_t N, typename S> S scale_to_8(S v) {
static_assert(N < 8);
assert(v < (1 << N));
constexpr unsigned Lshift = 8 - N;
constexpr unsigned Rshift = N - Lshift;
S result = static_cast<S>((v << Lshift) | (v >> Rshift));
assert(v < (1 << 8));
return result;
}
/**
* Unpacks an unsigned integer into a range of smaller integers.
* @param packed value to unpack
* @param begin destination start iterator
* @param end destination end iterator
* @param widths widths iterator. values are in bits
* @param little_endian if the input has the first element in the least significant place
* @return the total number of bits unpacked
*/
template <typename P, typename OI, typename WI>
requires std::unsigned_integral<P> && std::output_iterator<OI, P> && std::forward_iterator<WI>
size_t unpack_into(P packed, OI begin, OI end, WI widths, bool little_endian = true) {
using U = std::remove_cvref_t<decltype(*begin)>;
if (little_endian) {
// first element is in the least significant place of packed
unsigned offset = 0;
while (begin < end) {
auto w = *(widths++);
assert(w <= std::numeric_limits<U>::digits);
auto mask = ((1 << w) - 1); // least significant w bits all 1
*(begin++) = (packed >> offset) & mask; // write to output
offset += w; // increment offset
}
assert(offset <= std::numeric_limits<P>::digits); // detect an overflow condition
return offset;
} else {
// first element is in the most significant place of packed
// with non-constant width, we either need to iterate backwards or
// add up all the widths beforehand to know where to begin
unsigned total_offset = std::accumulate(widths, widths + std::distance(begin, end), 0);
assert(total_offset <= std::numeric_limits<P>::digits); // detect an overflow condition
unsigned offset = total_offset;
while (begin < end) {
auto w = *(widths++);
offset -= w; // decrement offset
assert(w < std::numeric_limits<U>::digits); // detect an overflow condition
auto mask = ((1 << w) - 1); // least significant w bits all 1
*(begin++) = (packed >> offset) & mask; // write to output
}
return total_offset;
}
}
/**
* Unpacks an unsigned integer into a range of smaller integers.
* @param packed value to unpack
* @param dest destination range
* @param widths widths range. values are in bits
* @param little_endian if the input has the first element in the least significant place
* @return the total number of bits unpacked
*/
template <typename P, typename OR, typename WR>
requires std::unsigned_integral<P> && range<OR> && range<WR>
size_t unpack_into(P packed, OR &dest, const WR &widths, bool little_endian = true) {
assert(size(widths) == size(dest));
return unpack_into(packed, dest.begin(), dest.end(), widths.begin(), little_endian);
}
/**
* Unpacks an unsigned integer into a range of smaller integers.
* @param packed value to unpack
* @param begin destination start iterator
* @param end destination end iterator
* @param width width of each packed element in bits
* @param little_endian if the input has the first element in the least significant place
* @return the total number of bits unpacked
*/
template <typename P, typename OI>
requires std::unsigned_integral<P> && std::output_iterator<OI, P>
size_t unpack_into(P packed, OI begin, OI end, size_t width, bool little_endian = true) {
return unpack_into(packed, begin, end, const_iterator(width), little_endian);
}
/**
* Unpacks an unsigned integer into a range of smaller integers.
* @param packed value to unpack
* @param dest destination range
* @param width width of each packed element in bits
* @param little_endian if the input has the first element in the least significant place
* @return the total number of bits unpacked
*/
template <typename P, typename OR>
requires std::unsigned_integral<P> && range<OR>
size_t unpack_into(P packed, OR &dest, size_t width, bool little_endian = true) {
return unpack_into(packed, dest.begin(), dest.end(), const_iterator(width), little_endian);
}
/**
* Unpacks an unsigned integer into an array of smaller integers
* @tparam U unpacked data type
* @tparam N number of values to unpack
* @param packed value to unpack
* @param widths widths iterator. values are in bits
* @param little_endian if the input has the first element in the least significant place
* @return an array of unpacked values
*/
template <typename U, size_t N, typename P, typename WI>
requires std::unsigned_integral<P> && std::forward_iterator<WI>
std::array<U, N> unpack(P packed, WI widths, bool little_endian = true) {
std::array<U, N> unpacked;
unpack_into(packed, unpacked, widths, little_endian);
return unpacked;
}
/**
* Unpacks an unsigned integer into an array of smaller integers
* @tparam U unpacked data type
* @param packed value to unpack
* @param widths widths array. values are in bits
* @param little_endian if the input has the first element in the least significant place
* @return an array of unpacked values
*/
template <typename U, size_t N, typename P>
requires std::unsigned_integral<P>
std::array<U, N> unpack(P packed, const std::array<size_t, N> &widths, bool little_endian = true) {
return unpack<U, N>(packed, widths.begin(), little_endian);
}
/**
* Unpacks an unsigned integer into an array of smaller integers
* @tparam U unpacked data type
* @tparam N number of values to unpack
* @param packed value to unpack
* @param widths widths range. values are in bits
* @param little_endian if the input has the first element in the least significant place
* @return an array of unpacked values
*/
template <typename U, size_t N, typename P, typename WR>
requires std::unsigned_integral<P> && range<WR>
std::array<U, N> unpack(P packed, const WR &widths, bool little_endian = true) {
assert(size(widths) == N);
return unpack<U, N>(packed, widths.begin(), little_endian);
}
/**
* Unpacks an unsigned integer into an array of smaller integers
* @tparam U unpacked data type
* @tparam N number of values to unpack
* @param packed value to unpack
* @param width width of each packed element in bits
* @param little_endian if the input has the first element in the least significant place
* @return an array of unpacked values
*/
template <typename U, size_t N, typename P>
requires std::unsigned_integral<P>
std::array<U, N> unpack(P packed, size_t width, bool little_endian = true) {
std::array<U, N> unpacked;
unpack_into(packed, unpacked, width, little_endian);
return unpacked;
}
/**
* Packs an iterable of integers into a single integer.
* @tparam II input iterator type
* @tparam WI width iterator type
* @tparam P Output data type. must be an unsigned integral type large enough to hold all input values
* @param start start iterator
* @param end end iterator
* @param widths width iterator. must be at least as large as the input data
* @param little_endian if the output value should have the first element in the least significant place
* of the output or not
* @return Packed integer of type P.
*/
template <typename P, typename II, typename WI>
requires std::unsigned_integral<P> && std::input_iterator<II> && std::input_iterator<WI>
inline constexpr P pack(II start, II end, WI widths, bool little_endian = true) {
P packed = 0;
unsigned offset = 0;
while (start < end) {
P val = static_cast<P>(*(start++));
auto w = *(widths++);
val &= ((1 << w) - 1);
assert(val < (1u << w)); // ensure value can fit in W bits
if (little_endian) {
packed |= static_cast<P>(val) << offset; // first element is in the least significant place of packed
} else {
packed = (packed << w) | static_cast<P>(val); // first element is in the most significant place of packed
}
offset += w; // increment offset
}
assert(offset <= std::numeric_limits<P>::digits); // detect an overflow condition
return packed;
}
/**
* Packs an iterable of integers into a single integer.
* @tparam IR input range type
* @tparam WR width range type
* @tparam P Output data type. must be an unsigned integral type large enough to hold all input values
* @param r range of values to pack
* @param widths range of widths to pack with. must be at least as large as r
* @param little_endian if the output value should have the first element in the least significant place
* of the output or not
* @return Packed integer of type P.
*/
template <typename P, typename IR, typename WR>
requires std::unsigned_integral<P> && range<IR> && range<WR>
inline constexpr P pack(IR r, WR widths, bool little_endian = true) {
assert(size(widths) == size(r));
return pack<P>(r.begin(), r.end(), widths.start(), little_endian);
}
/**
* Packs an iterable of integers into a single integer.
* @tparam II input iterator type
* @tparam P Output data type. must be an unsigned integral type large enough to hold all input values
* @param start start iterator
* @param end end iterator
* @param width Number of bits in each value
* @param little_endian if the output value should have the first element in the least significant place
* of the output or not
* @return Packed integer of type P.
*/
template <typename P, typename II>
requires std::unsigned_integral<P> && std::input_iterator<II>
inline constexpr P pack(II start, II end, size_t width, bool little_endian = true) {
return pack<P>(start, end, const_iterator(width), little_endian);
}
/**
* Packs a range of integers into a single integer.
* @tparam IR range type
* @tparam P Output data type. must be an unsigned integral type large enough to hold all input values
* @param r range of values to pack
* @param width Number of bits in each value
* @param little_endian if the output value should have the first element in the least significant place
* of the output or not
* @return Packed integer of type P.
*/
template <typename P, typename IR>
requires std::unsigned_integral<P> && range<IR>
inline constexpr P pack(IR r, size_t width, bool little_endian = true) {
return pack<P>(r.begin(), r.end(), const_iterator(width), little_endian);
}
} // namespace quicktex

View File

@ -1,146 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
namespace quicktex {
namespace detail {
template <class R> using subs_value_t = std::remove_reference_t<decltype(std::declval<R &>()[0])>;
}
template <typename D, typename T> class index_iterator_base {
public:
using value_type = T;
using size_type = int;
using difference_type = int;
D &operator++() {
_index++;
return static_cast<D &>(*this);
}
D operator++(int) {
D old = static_cast<D &>(*this);
_index++;
return old;
}
D &operator--() {
_index--;
return static_cast<D &>(*this);
}
D operator--(int) {
D old = static_cast<D &>(*this);
_index--;
return old;
}
D operator+(difference_type rhs) const {
D d = static_cast<const D &>(*this);
d._index += rhs;
return d;
}
D operator-(difference_type rhs) const {
D d = static_cast<const D &>(*this);
d._index -= rhs;
return d;
}
D &operator+=(difference_type rhs) {
*this = *this + rhs;
return *this;
}
D &operator-=(difference_type rhs) {
*this = *this - rhs;
return *this;
}
difference_type operator-(const D &rhs) const { return (difference_type)_index - rhs._index; }
friend D operator+(difference_type lhs, const D &rhs) { return rhs + lhs; }
friend auto operator<=>(const D &lhs, const D &rhs) { return lhs._index <=> rhs._index; }
T &operator[](difference_type i) { return *(static_cast<D &>(*this) + i); }
T &operator[](difference_type i) const { return *(static_cast<const D &>(*this) + i); }
protected:
int _index;
private:
friend D;
index_iterator_base(size_t index = 0) : _index(index) {}
};
template <typename R>
requires requires(const R &r) { r[0]; }
class index_iterator : public index_iterator_base<index_iterator<R>, detail::subs_value_t<R>> {
public:
using base = index_iterator_base<index_iterator<R>, detail::subs_value_t<R>>;
using typename base::difference_type;
using typename base::size_type;
using typename base::value_type;
index_iterator() : base(0), _range(nullptr) {}
index_iterator(R &range, int index) : base(index), _range(&range) {}
value_type &operator*() const {
assert(_range != nullptr);
assert(this->_index >= 0);
assert(this->_index < (size_type)_range->size());
return (*_range)[this->_index];
}
value_type *operator->() const { return &(this->operator*()); }
friend bool operator==(const index_iterator &lhs, const index_iterator &rhs) {
return (lhs._range == rhs._range) && (lhs._index == rhs._index);
}
private:
R *_range;
};
template <typename T> class const_iterator : public index_iterator_base<const_iterator<T>, const T> {
public:
using base = index_iterator_base<const_iterator<T>, const T>;
using typename base::difference_type;
using typename base::size_type;
using typename base::value_type;
const_iterator() : base(0), _value(T{}) {}
const_iterator(T value, int index = 0) : base(index), _value(value) {}
value_type &operator*() const { return _value; }
value_type *operator->() const { return &_value; }
friend bool operator==(const const_iterator &lhs, const const_iterator &rhs) {
return (lhs._value == rhs._value) && (lhs._index == rhs._index);
}
private:
T _value;
};
// const_iterator is guaranteed to be a random access iterator. it is not writable for obvious reasons
static_assert(std::random_access_iterator<const_iterator<int>>);
// index_iterator satisfied forward_iterator
static_assert(std::random_access_iterator<index_iterator<std::array<int, 4>>>);
} // namespace quicktex

View File

@ -1,178 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <tuple>
#include <xsimd/xsimd.hpp>
#include "util/ranges.h"
namespace quicktex {
namespace detail {
template <typename T>
concept simdable = random_access_range<T> && std::contiguous_iterator<decltype(std::declval<T>().begin())> &&
std::is_arithmetic_v<range_value_t<T>>;
template <typename T, bool serial = false> struct chunker_impl {};
template <typename T, bool serial>
requires simdable<T> && (!serial)
struct chunker_impl<T, serial> {
// range with contiguous, SIMDable data
static constexpr size_t steps = 2;
using chunk_types = std::tuple<xsimd::batch<range_value_t<T>>, range_value_t<T>>;
template <size_t step> using chunk_type = std::tuple_element_t<step, chunk_types>;
static constexpr std::array<size_t, 2> chunk_sizes = {chunk_type<0>::size, 1};
template <size_t step> static constexpr size_t chunk_count(const T& r) {
if constexpr (step == 0) {
return std::size(r) / chunk_sizes[0];
} else {
return std::size(r) % chunk_sizes[0];
}
}
template <size_t step> static constexpr auto get_chunk(const T& r, size_t i) {
assert(i < chunk_count<step>(r));
if constexpr (step == 0) {
return xsimd::load_unaligned(&r[chunk_sizes[0] * i]);
} else {
return r[chunk_sizes[0] * chunk_count<0>(r) + i];
}
}
template <size_t step>
static constexpr void set_chunk(T& r, size_t i, const std::tuple_element_t<step, chunk_types>& c) {
assert(i < chunk_count<step>(r));
if constexpr (step == 0) {
xsimd::store_unaligned(&r[chunk_sizes[0] * i], c);
} else {
r[chunk_sizes[0] * chunk_count<0>(r) + i] = c;
}
}
};
template <typename T, bool serial>
requires random_access_range<T> && (!simdable<T> || serial)
struct chunker_impl<T, serial> {
// range with data that cant be SIMDed
static constexpr size_t steps = 1;
template <size_t step> using chunk_type = range_value_t<T>;
static constexpr std::array<size_t, 1> chunk_sizes = {1};
template <size_t step> static constexpr size_t chunk_count(const T& r) { return r.size(); }
template <size_t step> static constexpr auto get_chunk(const T& r, size_t i) { return r[i]; }
template <size_t step> static constexpr void set_chunk(T& r, size_t i, const chunk_type<0>& c) { r[i] = c; }
};
template <typename T, bool serial>
requires(!sized_range<T>)
struct chunker_impl<T, serial> {
static constexpr size_t steps = 1;
using chunk_types = std::tuple<T>;
template <size_t step> using chunk_type = T;
static constexpr std::array<size_t, 1> chunk_sizes = {1};
template <size_t step> static constexpr size_t chunk_count(const T&) { return 1; }
template <size_t step> static constexpr auto get_chunk(const T& r, size_t) { return r; }
template <size_t step> static constexpr void set_chunk(T& r, size_t, const T& c) { r = c; }
};
template <typename T, bool serial = false, size_t step = 0>
using chunk_type = typename chunker_impl<T, serial>::template chunk_type<step>;
template <typename T, bool serial, typename Op, std::size_t step, typename... Args>
static constexpr bool callable_step() {
return std::is_invocable_r_v<typename chunker_impl<T, serial>::template chunk_type<step>, Op,
typename chunker_impl<Args, serial>::template chunk_type<step>...>;
}
template <typename T, bool serial, typename Op, typename... Args, std::size_t... steps>
static constexpr bool callable_steps(std::index_sequence<steps...>) {
return (callable_step<T, serial, Op, steps, Args...>() && ...);
}
template <typename T, bool serial, typename Op, typename... Args> static constexpr bool callable() {
// if constexpr (!(std::same_as<T, Args> && ...)) return false;
// return callable_steps<T, serial, Op>(std::make_index_sequence<chunker_impl<T, serial>::steps>());
return callable_steps<T, serial, Op, Args...>(std::make_index_sequence<1>());
}
template <typename T, bool serial, size_t step, typename... Args>
requires((std::is_scalar_v<Args> || std::same_as<T, Args>) && ...)
inline void do_map_step(auto f, T& result, const Args&... args) {
using impl = chunker_impl<T, serial>;
using chunk_type = typename impl::template chunk_type<step>;
size_t chunk_count = impl::template chunk_count<step>(result);
for (unsigned i = 0; i < chunk_count; i++) {
chunk_type out_chunk = f(chunker_impl<Args, serial>::template get_chunk<step>(args, i)...);
impl::template set_chunk<step>(result, i, out_chunk);
}
}
template <typename T, bool serial, typename Op, std::size_t... steps, typename... Args>
requires((std::is_scalar_v<Args> || std::same_as<T, Args>) && ...)
inline void do_map_steps(Op f, T& result, std::index_sequence<steps...>, const Args&... args) {
// static_assert(callable<T, serial, Op, Args...>());
(do_map_step<T, serial, steps>(f, result, args...), ...);
}
template <typename T, bool serial, typename Op, typename... Args>
requires((std::is_scalar_v<Args> || std::same_as<T, Args>) && ...)
inline void do_map_all(Op f, T& result, const Args&... args) {
constexpr bool must_serialize = serial || !callable<T, false, Op, Args...>();
do_map_steps<T, must_serialize>(f, result, std::make_index_sequence<chunker_impl<T, serial>::steps>(), args...);
}
} // namespace detail
template <typename R, typename T, bool serial = false, typename Op, typename... Args>
requires sized_range<T> && (sized_range<Args> && ...)
inline R map_to(Op f, const T& in, const Args&... args) {
// the input and result types are not the same, so attempting chunking is unsafe
R result{};
for (unsigned i = 0; i < in.size(); i++) { result[i] = f(in[i], args[i]...); }
return result;
}
template <typename T, bool serial = false, typename Op, typename... Args>
requires sized_range<T>
inline auto map(Op f, const T& in, const Args&... args) {
// assert(((in.size() == args.size())) && ...);
if constexpr (((std::is_scalar_v<Args> || std::same_as<T, Args>)&&...) &&
(detail::callable<T, true, Op, T, Args...>())) {
// the input and result types are all the same type and size, so we can attempt chunking
T result{};
detail::do_map_all<T, serial>(f, result, in, args...);
return result;
} else {
using result_type = std::invoke_result_t<Op, typename detail::chunk_type<T, true>, range_value_t<Args>...>;
return map_to<std::array<result_type, std::tuple_size_v<T>>, T, serial>(f, in, args...);
}
}
} // namespace quicktex

View File

@ -1,84 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021-2022 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cassert>
#include <cstdint>
#include <functional>
#include <limits>
#include <numeric>
#include <string>
#include <type_traits>
#include <vector>
#include "util/ranges.h"
#include "xsimd/xsimd.hpp"
namespace quicktex {
using std::abs; // abs overload for builtin types
using xsimd::abs; // abs overload for xsimd buffers
template <typename S>
requires requires(S &s) { s.abs(); }
constexpr S abs(S value) {
return value.abs();
}
template <typename S, typename R>
requires requires(S s, R r) { s.clamp(r, r); }
constexpr S clamp(S value, R low, R high) {
return value.clamp(low, high);
}
template <typename S>
requires std::is_scalar_v<S>
constexpr S clamp(S value, S low, S high) {
assert(low <= high);
if (value < low) return low;
if (value > high) return high;
return value;
}
template <typename S, typename A>
constexpr xsimd::batch<S, A> clamp(xsimd::batch<S, A> value, const xsimd::batch<S, A> &low,
const xsimd::batch<S, A> &high) {
return xsimd::clip(value, low, high);
}
template <typename S, typename A>
constexpr xsimd::batch<S, A> clamp(xsimd::batch<S, A> value, const S &low, const S &high) {
return clamp(value, xsimd::broadcast(low), xsimd::broadcast(high));
}
template <typename S>
requires requires(S &s) { s.sum(); }
constexpr auto sum(S value) {
return value.sum();
}
template <typename S>
requires std::is_scalar_v<S>
constexpr auto sum(S value) {
return value;
// horizontally adding a scalar is a noop
}
template <typename S, typename A> constexpr auto sum(xsimd::batch<S, A> value) { return xsimd::hadd(value); }
} // namespace quicktex

View File

@ -1,74 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <algorithm>
#include <array>
#include <cassert>
#include <cstdint>
#include <functional>
#include <iterator>
#include <limits>
#include <numeric>
#include <string>
#include <type_traits>
namespace quicktex {
// std::ranges is not usable by default in libc++ 13
template <class T>
concept range = requires(T &t) {
t.begin();
t.end();
};
using std::size;
template <range T> constexpr auto size(const T &range) { return std::distance(range.begin(), range.end()); }
template <class T>
concept sized_range = range<T> && requires(T &t) { size(t); };
template <class R> using iterator_t = decltype(std::declval<R &>().begin());
template <class R> using sentinel_t = decltype(std::declval<R &>().end());
template <class R> using range_size_t = decltype(size(std::declval<R &>()));
template <class R> using range_difference_t = std::iter_difference_t<iterator_t<R>>;
template <class R> using range_value_t = std::iter_value_t<iterator_t<R>>;
template <class R> using range_reference_t = std::iter_reference_t<iterator_t<R>>;
template <class R> using range_rvalue_reference_t = std::iter_rvalue_reference_t<iterator_t<R>>;
template <class R>
concept input_range = range<R> && std::input_iterator<iterator_t<R>>;
template <class R, typename T>
concept output_range = range<R> && (std::output_iterator<iterator_t<R>, T>);
template <class R>
concept forward_range = range<R> && std::forward_iterator<iterator_t<R>>;
template <class R>
concept bidirectional_range = range<R> && std::bidirectional_iterator<iterator_t<R>>;
template <class R>
concept random_access_range = range<R> && std::random_access_iterator<iterator_t<R>>;
template <class R>
concept contiguous_range = range<R> && std::contiguous_iterator<iterator_t<R>>;
} // namespace quicktex

View File

@ -1,97 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <array>
#include <type_traits>
#include "util/math.h"
#include "util/types.h"
#include "xsimd/xsimd.hpp"
template <typename T> using requires_arch = xsimd::kernel::requires_arch<T>;
namespace quicktex::simd {
namespace kernel {
#if XSIMD_WITH_NEON64
template <class A> inline int16_t whadd(xsimd::batch<int8_t, A> const& arg, requires_arch<xsimd::neon64>) {
return vaddlvq_s8(arg);
}
template <class A> inline int32_t whadd(xsimd::batch<int16_t, A> const& arg, requires_arch<xsimd::neon64>) {
return vaddlvq_s16(arg);
}
template <class A> inline int64_t whadd(xsimd::batch<int32_t, A> const& arg, requires_arch<xsimd::neon64>) {
return vaddlvq_s32(arg);
}
template <class A> inline uint16_t whadd(xsimd::batch<uint8_t, A> const& arg, requires_arch<xsimd::neon64>) {
return vaddlvq_u8(arg);
}
template <class A> inline uint32_t whadd(xsimd::batch<uint16_t, A> const& arg, requires_arch<xsimd::neon64>) {
return vaddlvq_u16(arg);
}
template <class A> inline uint64_t whadd(xsimd::batch<uint32_t, A> const& arg, requires_arch<xsimd::neon64>) {
return vaddlvq_u32(arg);
}
#endif
#if XSIMD_WITH_SSE2
template <class A> inline int32_t whadd(xsimd::batch<int16_t, A> const& arg, requires_arch<xsimd::sse2>) {
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
xsimd::batch<int32_t, A> paired = _mm_madd_epi16(arg, _mm_set1_epi16(1));
return xsimd::hadd(paired);
}
#endif
#if XSIMD_WITH_AVX2
template <class A> inline int32_t whadd(xsimd::batch<int16_t, A> const& arg, requires_arch<xsimd::avx2>) {
// Pairwise widening sum with multiply by 1, then sum all N/2 widened lanes
xsimd::batch<int32_t, A> paired = _mm256_madd_epi16(arg, _mm256_set1_epi16(1));
return xsimd::hadd(paired);
}
#endif
template <class A, class T> inline next_size_t<T> whadd(xsimd::batch<T, A> const& arg, requires_arch<xsimd::generic>) {
// Generic implementation that should work everywhere
using b_type = xsimd::batch<T, A>;
using r_type = next_size_t<T>;
const auto len = b_type::size;
alignas(A::alignment()) T buffer[len];
r_type sum = 0;
arg.store_aligned(buffer);
for (T val : buffer) { sum += static_cast<r_type>(val); }
return sum;
}
} // namespace kernel
template <class A, class T> inline next_size_t<T> whadd(xsimd::batch<T, A> const& arg) {
return kernel::whadd(arg, A{});
}
} // namespace quicktex::simd

View File

@ -1,97 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <concepts>
#include <iterator>
#include "util/ranges.h"
namespace quicktex {
template <std::input_or_output_iterator I, std::sentinel_for<I> S = I> struct subrange {
public:
using iterator_type = I;
using sentinel_type = S;
using value_type = std::iter_value_t<I>;
using reference_type = std::iter_reference_t<I>;
using difference_type = std::iter_difference_t<I>;
constexpr subrange(const I& b, const S& e) : _begin(b), _end(e) {}
constexpr I begin() const { return _begin; }
constexpr S end() const { return _end; }
constexpr bool empty() const { return _begin == _end; }
constexpr difference_type size() const { return std::distance(_end, _begin); }
explicit constexpr operator bool() const { return !empty(); }
constexpr subrange& advance(difference_type n) {
assert(n >= 0 || std::bidirectional_iterator<I>); // forward iterators cannot be decremented
if (n > 0) {
for (int i = 0; i < n && _begin != _end; i++) { _begin++; }
} else {
for (int i = 0; i > n && _begin != _end; i--) { _begin--; }
}
return *this;
}
constexpr subrange next(difference_type n = 1) const {
auto tmp = *this;
return tmp.advance(n);
}
template <typename _ = I>
requires std::bidirectional_iterator<I>
constexpr subrange prev(difference_type n = 1) const {
return next(-n);
}
template <typename _ = I>
requires std::random_access_iterator<I>
constexpr reference_type operator[](difference_type i) {
assert(i >= 0 && i < size());
return _begin[i];
}
template <typename _ = I>
requires std::random_access_iterator<I>
constexpr const reference_type operator[](difference_type i) const {
assert(i >= 0 && i < size());
return _begin[i];
}
template <typename _ = I>
requires std::contiguous_iterator<I>
constexpr value_type* data() {
return std::to_address(_begin);
}
template <typename _ = I>
requires std::contiguous_iterator<I>
constexpr value_type const* data() const {
return std::to_address(_begin);
}
private:
I _begin;
S _end;
};
} // namespace quicktex

View File

@ -1,49 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include <cstdint>
namespace quicktex {
template <class> struct next_size;
template <class T> using next_size_t = typename next_size<T>::type;
template <class T> struct type_tag { using type = T; };
template <> struct next_size<int8_t> : type_tag<int16_t> {};
template <> struct next_size<int16_t> : type_tag<int32_t> {};
template <> struct next_size<int32_t> : type_tag<int64_t> {};
template <> struct next_size<uint8_t> : type_tag<uint16_t> {};
template <> struct next_size<uint16_t> : type_tag<uint32_t> {};
template <> struct next_size<uint32_t> : type_tag<uint64_t> {};
template <auto bitCount>
using unsigned_bits =
std::conditional_t<bitCount <= 8, std::uint8_t,
std::conditional_t<bitCount <= 16, std::uint16_t,
std::conditional_t<bitCount <= 32, std::uint32_t,
std::conditional_t<bitCount <= 64, std::uint64_t, void>>>>;
template <auto bitCount>
using signed_bits =
std::conditional_t<bitCount <= 8, std::int8_t,
std::conditional_t<bitCount <= 16, std::int16_t,
std::conditional_t<bitCount <= 32, std::int32_t,
std::conditional_t<bitCount <= 64, std::int64_t, void>>>>;
} // namespace quicktex::util

View File

@ -46,6 +46,7 @@ class CMakeBuild(build_ext):
"-DPython_EXECUTABLE={}".format(sys.executable),
"-DPython_ROOT_DIR={}".format(os.path.dirname(sys.executable)),
"-DQUICKTEX_VERSION_INFO={}".format(version), # include version info in module
"-DQUICKTEX_MODULE_ONLY=TRUE", # only build the module, not the wrapper
"-DCMAKE_BUILD_TYPE={}".format(cfg), # not used on MSVC, but no harm
# clear cached make program binary, see https://github.com/pypa/setuptools/issues/2912
"-U",
@ -53,9 +54,6 @@ class CMakeBuild(build_ext):
]
build_args = []
if self.verbose:
build_args += ["--verbose"]
if self.compiler.compiler_type != "msvc":
# Using Ninja-build since it a) is available as a wheel and b)
# multithreads automatically. MSVC would require all variables be
@ -66,9 +64,6 @@ class CMakeBuild(build_ext):
cmake_args += ["-GNinja"]
else:
# if 'CC' in os.environ and 'clang-cl' in os.environ['CC']:
# cmake_args += ["-T", 'ClangCL'] # https://stackoverflow.com/a/64189112/7645957
# Single config generators are handled "normally"
single_config = any(x in cmake_generator for x in {"NMake", "Ninja"})

View File

@ -1,28 +0,0 @@
include(FetchContent)
FetchContent_Declare(
googletest
URL https://github.com/google/googletest/archive/e2239ee6043f73722e7aa812a459f54a28552929.zip
)
# For Windows: Prevent overriding the parent project's compiler/linker settings
set(gtest_force_shared_crt ON CACHE BOOL "" FORCE)
FetchContent_MakeAvailable(googletest)
file(GLOB_RECURSE TEST_HEADER_FILES "**.h")
file(GLOB_RECURSE TEST_SOURCE_FILES "**.cpp")
file(GLOB_RECURSE TEST_PYTHON_FILES "**.py")
# Organize source files together for some IDEs
source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${TEST_SOURCE_FILES} ${TEST_HEADER_FILES} ${TEST_PYTHON_FILES})
add_executable(Test ${TEST_SOURCE_FILES} ${TEST_HEADER_FILES})
if ((NOT MSVC) AND (CMAKE_BUILD_TYPE MATCHES Debug))
target_compile_options(Test PUBLIC -fsanitize=address,undefined -fno-sanitize-recover=address,undefined -fno-omit-frame-pointer)
target_link_options(Test PUBLIC -fsanitize=address,undefined -fno-sanitize-recover=address,undefined -fno-omit-frame-pointer)
endif ()
target_link_libraries(Test PUBLIC quicktex gtest_main)
include(GoogleTest)
gtest_discover_tests(Test)

View File

@ -1,227 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <Matrix.h>
#include <gtest/gtest.h>
#include <util/math.h>
#include <array>
#include <cstdlib>
namespace quicktex::tests {
#define EXPECT_MATRIX_EQ(value, expected) \
{ \
auto v = value; \
auto e = expected; \
if constexpr (std::is_floating_point_v<typename decltype(v)::value_type>) { \
for (unsigned i = 0; i < v.elements; i++) { \
EXPECT_FLOAT_EQ(v.element(i), e.element(i)) << "At index " << i; \
} \
} else { \
EXPECT_EQ(v, e); \
} \
}
constexpr size_t fibn(size_t n) { return (n < 2) ? n : fibn(n - 1) + fibn(n - 2); }
template <typename T> constexpr T sqr(T n) { return n * n; }
template <typename Op, typename... Args> constexpr void foreach (Op f, Args... args) { (f(args), ...); }
template <typename T> class MatrixTest : public testing::Test {
public:
using Scalar = T;
template <size_t M> using Vec = quicktex::Vec<T, M>;
template <size_t M, size_t N> using Matrix = quicktex::Matrix<T, M, N>;
template <typename M> constexpr M iota(T start = 0, T stride = 1) {
M result(0);
for (unsigned i = 0; i < M::elements; i++) { result.element(i) = (static_cast<T>(i) + start) * stride; }
return result;
}
template <typename M> constexpr M sqr(T start = 0, T stride = 1) {
M result(0);
for (unsigned i = 0; i < M::elements; i++) {
result.element(i) = (static_cast<T>(i) + start) * (static_cast<T>(i) + start) * stride;
}
return result;
}
template <typename M> constexpr M fib(T start = 0) {
M result(0);
for (unsigned i = 0; i < M::elements; i++) { result.element(i) = fibn(i + start); }
return result;
}
static constexpr auto sizes = std::make_tuple(Vec<4>(0), Vec<7>(0), Matrix<4, 4>(0), Matrix<5, 6>(0));
template <typename Op> constexpr void foreach_size(Op f) {
auto foreach = [f]<typename... Args>(Args... args) { (f(args), ...); };
std::apply(foreach, sizes);
}
};
using Scalars = ::testing::Types<uint8_t, int8_t, uint16_t, int16_t, uint32_t, int32_t, float, double>;
TYPED_TEST_SUITE(MatrixTest, Scalars);
#define IOTA(M, start, stride) this->TestFixture::template iota<M>(start, stride)
#define SQR(M, start, stride) this->TestFixture::template sqr<M>(start, stride)
#define FIB(M, start) this->TestFixture::template fib<M>(start)
TYPED_TEST(MatrixTest, negate) {
if constexpr (std::unsigned_integral<typename TestFixture::Scalar>) {
GTEST_SKIP();
} else {
TestFixture::foreach_size([&]<typename M>(M) {
EXPECT_MATRIX_EQ(-IOTA(M, 0, 1), IOTA(M, 0, -1));
EXPECT_MATRIX_EQ(-IOTA(M, 0, -1), IOTA(M, 0, 1));
});
}
}
TYPED_TEST(MatrixTest, add) {
TestFixture::foreach_size([&]<typename M>(M) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 1) + IOTA(M, 0, 3), IOTA(M, 0, 4));
EXPECT_MATRIX_EQ(IOTA(M, 0, 2) + IOTA(M, 0, 2), IOTA(M, 0, 4));
if constexpr (!std::unsigned_integral<typename M::value_type>) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 3) + IOTA(M, 0, -1), IOTA(M, 0, 2));
}
});
}
TYPED_TEST(MatrixTest, subtract) {
TestFixture::foreach_size([&]<typename M>(M) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 4) - IOTA(M, 0, 1), IOTA(M, 0, 3));
EXPECT_MATRIX_EQ(IOTA(M, 0, 2) - IOTA(M, 0, 2), IOTA(M, 0, 0));
if constexpr (!std::unsigned_integral<typename M::value_type>) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 3) - IOTA(M, 0, -1), IOTA(M, 0, 4));
EXPECT_MATRIX_EQ(IOTA(M, 0, 1) - IOTA(M, 0, 3), IOTA(M, 0, -2));
}
});
}
TYPED_TEST(MatrixTest, multiply) {
TestFixture::foreach_size([&]<typename M>(M) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 2) * 2, IOTA(M, 0, 4));
EXPECT_MATRIX_EQ(IOTA(M, 0, 2) * 0, M(0));
if constexpr (!std::is_unsigned_v<typename M::value_type>) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 2) * -2, IOTA(M, 0, -4));
}
if constexpr (std::numeric_limits<typename M::value_type>::max() >= sqr(M::elements - 1)) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 1) * IOTA(M, 0, 1), SQR(M, 0, 1));
}
if constexpr (std::numeric_limits<typename M::value_type>::max() >= sqr(M::elements - 1) * 3) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 1) * IOTA(M, 0, 3), SQR(M, 0, 3));
EXPECT_MATRIX_EQ(IOTA(M, 0, 0) * IOTA(M, 0, 3), SQR(M, 0, 0));
}
if constexpr (std::numeric_limits<typename M::value_type>::max() >= sqr(M::elements - 1) * 4) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 2) * IOTA(M, 0, 2), SQR(M, 0, 4));
if constexpr (!std::is_unsigned_v<typename M::value_type>) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 4) * IOTA(M, 0, -1), SQR(M, 0, -4));
EXPECT_MATRIX_EQ(IOTA(M, 0, -4) * IOTA(M, 0, -1), SQR(M, 0, 4));
}
}
});
}
TYPED_TEST(MatrixTest, divide) {
TestFixture::foreach_size([&]<typename M>(M) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 4) / 2, IOTA(M, 0, 2));
EXPECT_MATRIX_EQ(IOTA(M, 0, 2) / 1, IOTA(M, 0, 2));
if constexpr (!std::is_unsigned_v<typename M::value_type>) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 4) / -2, IOTA(M, 0, -2));
EXPECT_MATRIX_EQ(IOTA(M, 0, -4) / -2, IOTA(M, 0, 2));
}
if constexpr (std::numeric_limits<typename M::value_type>::max() >= sqr(M::elements)) {
EXPECT_MATRIX_EQ(SQR(M, 1, 1) / IOTA(M, 1, 1), IOTA(M, 1, 1));
}
if constexpr (std::numeric_limits<typename M::value_type>::max() >= sqr(M::elements) * 3) {
EXPECT_MATRIX_EQ(SQR(M, 1, 3) / IOTA(M, 1, 1), IOTA(M, 1, 3));
EXPECT_MATRIX_EQ(SQR(M, 1, 3) / IOTA(M, 1, 3), IOTA(M, 1, 1));
}
if constexpr (std::numeric_limits<typename M::value_type>::max() >= sqr(M::elements) * 4) {
EXPECT_MATRIX_EQ(SQR(M, 1, 4) / IOTA(M, 1, 2), IOTA(M, 1, 2));
if constexpr (!std::is_unsigned_v<typename M::value_type>) {
EXPECT_MATRIX_EQ(SQR(M, 1, -4) / IOTA(M, 1, -1), IOTA(M, 1, 4));
EXPECT_MATRIX_EQ(SQR(M, 1, 4) / IOTA(M, 1, -1), IOTA(M, 1, -4));
}
}
});
}
TYPED_TEST(MatrixTest, abs) {
if constexpr (std::unsigned_integral<typename TestFixture::Scalar>) {
GTEST_SKIP();
} else {
TestFixture::foreach_size([&]<typename M>(M) {
EXPECT_MATRIX_EQ(IOTA(M, 0, -1).abs(), IOTA(M, 0, 1));
EXPECT_MATRIX_EQ(IOTA(M, 0, 1).abs(), IOTA(M, 0, 1));
});
}
}
TYPED_TEST(MatrixTest, clamp) {
TestFixture::foreach_size([&]<typename M>(M) {
EXPECT_MATRIX_EQ(IOTA(M, 0, 1).clamp(0, M::elements - 1), IOTA(M, 0, 1));
EXPECT_MATRIX_EQ(IOTA(M, 0, 1).clamp(M(0), IOTA(M, 0, 1)), IOTA(M, 0, 1));
EXPECT_MATRIX_EQ(IOTA(M, 0, 2).clamp(IOTA(M, 0, 1), IOTA(M, 0, 3)), IOTA(M, 0, 2));
EXPECT_MATRIX_EQ(IOTA(M, 0, 3).clamp(IOTA(M, 0, 1), IOTA(M, 0, 2)), IOTA(M, 0, 2));
EXPECT_MATRIX_EQ(IOTA(M, 0, 1).clamp(M(0), M(0)), M(0));
if (std::numeric_limits<typename M::value_type>::max() >= fibn(M::elements)) {
EXPECT_MATRIX_EQ(FIB(M, 1).clamp(M(0), IOTA(M, 0, 1)), IOTA(M, 0, 1));
}
if (std::numeric_limits<typename M::value_type>::max() >= sqr(M::elements - 1)) {
EXPECT_MATRIX_EQ(SQR(M, 0, 1).clamp(M(0), IOTA(M, 0, 1)), IOTA(M, 0, 1));
}
});
}
TYPED_TEST(MatrixTest, matrix_multiply) {
TestFixture::foreach_size([&]<typename M>(M) {
auto identity = Matrix<typename M::value_type, M::height, M::height>::identity();
EXPECT_MATRIX_EQ(identity.mult(IOTA(M, 0, 1)), IOTA(M, 0, 1));
});
}
TYPED_TEST(MatrixTest, sum) {
TestFixture::foreach_size([&]<typename M>(M) {
EXPECT_FLOAT_EQ(M(1).sum(), M::elements);
EXPECT_FLOAT_EQ(M(0).sum(), 0);
if (std::numeric_limits<typename M::value_type>::max() >= M::elements * (M::elements + 1) / 2) {
EXPECT_FLOAT_EQ(IOTA(M, 1, 1).sum(), M::elements * (M::elements + 1) / 2);
}
if constexpr (!std::unsigned_integral<typename M::value_type>) {
EXPECT_FLOAT_EQ(M(-1).sum(), -1 * (int)M::elements);
}
});
}
// endregion
} // namespace quicktex::tests

View File

@ -1,81 +0,0 @@
/* Quicktex Texture Compression Library
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
and licenced under the public domain
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include <gtest/gtest.h>
#include <util/math.h>
#include <util/simd.h>
#include <util/types.h>
#include <array>
#include <cstdint>
#include <limits>
#include <numeric>
#include <vector>
#include <xsimd/xsimd.hpp>
namespace quicktex::tests {
template <typename T> constexpr auto make_arrays() {
std::vector<std::array<T, xsimd::batch<T>::size>> arrays;
std::array<T, xsimd::batch<T>::size> buffer;
std::iota(buffer.begin(), buffer.end(), 1);
arrays.push_back(buffer);
buffer.fill(1);
arrays.push_back(buffer);
buffer.fill(0);
arrays.push_back(buffer);
buffer.fill(std::numeric_limits<T>::max());
arrays.push_back(buffer);
if (std::is_signed_v<T>) {
std::iota(buffer.begin(), buffer.end(), -1 * (int)xsimd::batch<T>::size);
arrays.push_back(buffer);
buffer.fill(-1);
arrays.push_back(buffer);
buffer.fill(std::numeric_limits<T>::min());
arrays.push_back(buffer);
}
return arrays;
}
#define TEST_WHADD(TYPE) \
TEST(simd, whadd_##TYPE) { \
for (auto arr : make_arrays<TYPE>()) { \
auto v = xsimd::load_unaligned(&arr[0]); \
auto vsum = simd::whadd(v); \
auto ssum = std::accumulate(arr.begin(), arr.end(), static_cast<next_size_t<TYPE>>(0)); \
EXPECT_EQ(vsum, ssum); \
} \
}
TEST_WHADD(int8_t)
TEST_WHADD(uint8_t)
TEST_WHADD(int16_t)
TEST_WHADD(uint16_t)
TEST_WHADD(int32_t)
TEST_WHADD(uint32_t)
} // namespace quicktex::tests

View File

@ -138,22 +138,20 @@ class TestBC1Texture:
class TestBC1Encoder:
"""Test BC1Encoder"""
@pytest.mark.parametrize('level', range(18))
def test_block_4color(self, level, color_mode):
def test_block_4color(self, color_mode):
"""Test encoder output with 4 color greyscale test block"""
encoder = BC1Encoder(level, color_mode)
encoder = BC1Encoder(color_mode=color_mode)
out_tex = encoder.encode(BC1Blocks.greyscale.texture)
out_block = out_tex[0, 0]
assert out_tex.size_blocks == (1, 1)
assert not out_block.is_3color
assert out_block.tobytes() == BC1Blocks.greyscale.block.tobytes()
assert out_block == BC1Blocks.greyscale.block
@pytest.mark.parametrize('level', range(2, 18)) # lowest 2 levels can be improved, but right now choke on this test
def test_block_3color(self, level, color_mode):
def test_block_3color(self, color_mode):
"""Test encoder output with 3 color test block"""
encoder = BC1Encoder(level, color_mode)
encoder = BC1Encoder(color_mode=color_mode)
out_tex = encoder.encode(BC1Blocks.three_color.texture)
out_block = out_tex[0, 0]
@ -162,14 +160,13 @@ class TestBC1Encoder:
if encoder.color_mode != BC1Encoder.ColorMode.FourColor:
# we only care about the selectors if we are in 3 color mode
assert out_block.is_3color
assert out_block.tobytes() == BC1Blocks.three_color.block.tobytes()
assert out_block == BC1Blocks.three_color.block
else:
assert not out_block.is_3color
@pytest.mark.parametrize('level', range(2, 18)) # lowest 2 levels can be improved, but right now choke on this test
def test_block_3color_black(self, level, color_mode):
def test_block_3color_black(self, color_mode):
"""Test encoder output with 3 color test block with black pixels"""
encoder = BC1Encoder(level, color_mode)
encoder = BC1Encoder(color_mode=color_mode)
out_tex = encoder.encode(BC1Blocks.three_color_black.texture)
out_block = out_tex[0, 0]
@ -181,7 +178,7 @@ class TestBC1Encoder:
# we only care about the selectors if we are in 3 color black mode
assert out_block.is_3color
assert has_black
assert out_block.tobytes() == BC1Blocks.three_color_black.block.tobytes()
assert out_block == BC1Blocks.three_color_black.block
elif color_mode == BC1Encoder.ColorMode.ThreeColor:
assert not (has_black and out_block.is_3color)
else:

View File

@ -1,12 +1,9 @@
"""Test if everything is installed correctly"""
import _quicktex
import pytest
import quicktex
class TestInstall:
@pytest.mark.skipif(_quicktex._debug_build, reason="Debug builds dont have valid version strings")
def test_version(self):
"""Test if the extension module version matches what setuptools returns"""
try:
@ -19,4 +16,4 @@ class TestInstall:
version = metadata.version('quicktex')
assert version == quicktex.__version__
assert version == quicktex.__version__, 'incorrect version string from extension module'

View File

@ -37,7 +37,6 @@ function(set_project_warnings project_name)
/w14928 # illegal copy-initialization; more than one user-defined
# conversion has been implicitly applied
/permissive- # standards conformance mode for MSVC compiler.
/wd4701 # uninitialized variable checker is trigger-happy
)
set(CLANG_WARNINGS
@ -53,14 +52,13 @@ function(set_project_warnings project_name)
-Wunused # warn on anything being unused
-Woverloaded-virtual # warn if you overload (not override) a virtual
# function
# -Wpedantic # warn if non-standard C++ is used
-Wpedantic # warn if non-standard C++ is used
#-Wconversion # warn on type conversions that may lose data
#-Wsign-conversion # warn on sign conversions
-Wnull-dereference # warn if a null dereference is detected
-Wdouble-promotion # warn if float is implicit promoted to double
-Wformat=2 # warn on security issues around functions that format output
# (ie printf)
-Wsign-compare
)
if (${PROJECT_NAME}_WARNINGS_AS_ERRORS)
@ -76,7 +74,7 @@ function(set_project_warnings project_name)
-Wduplicated-branches # warn if if / else branches have duplicated code
-Wlogical-op # warn about logical operations being used where bitwise were
# probably wanted
# -Wuseless-cast # warn if you perform a cast to the same type
-Wuseless-cast # warn if you perform a cast to the same type
)
if (MSVC)

View File

@ -1,68 +0,0 @@
function(set_simd_flags target_name)
if (DEFINED ENV{QUICKTEX_SIMD_MODE})
set(simd_mode $ENV{QUICKTEX_SIMD_MODE})
message("SIMD mode is ${simd_mode}")
else ()
message("Defaulting to AUTO SIMD mode. Resulting binary is not fit for distributing to other computers!")
set(simd_mode "AUTO")
endif ()
if ((CMAKE_OSX_ARCHITECTURES MATCHES "x86_64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "(x86)|(X86)|(amd64)|(AMD64)"))
set(X86 TRUE)
message("X86 Detected")
else ()
set(X86 FALSE)
endif ()
if ((CMAKE_OSX_ARCHITECTURES MATCHES "arm64") OR (CMAKE_SYSTEM_PROCESSOR MATCHES "(arm64)|(ARM64)|(aarch64)"))
set(ARM TRUE)
message("ARM Detected")
else ()
set(ARM FALSE)
endif ()
if (simd_mode STREQUAL "SCALAR")
# force xsimd to use scalar ops. This should really only be used for testing,
# since SSE2 and NEON are guranteed on 64-bit platforms
if (MSVC)
target_compile_options(${target_name} PUBLIC /DXSIMD_NO_SUPPORTED_ARCHITECTURE=1)
else ()
target_compile_options(${target_name} PUBLIC -DXSIMD_NO_SUPPORTED_ARCHITECTURE=1)
endif ()
return()
endif ()
if (X86)
if (simd_mode STREQUAL "AUTO")
if (MSVC)
#MSVC has no -march=native equivalent. womp
elseif (NOT ARM)
# setting -march=native on an M1 causes Clang to freak out,
# and arm64 is pretty samey instruction set wise (arm9 and SVE2 notwithstanding)
# Currently AVX512 will cause problems with buffer overruns,
# and I dont have good test hardware for it anyways
target_compile_options(${target_name} PUBLIC -march=native -mno-avx512f)
endif ()
elseif (simd_mode STREQUAL "SSSE3")
if (MSVC)
target_compile_options(${target_name} PUBLIC /DXSIMD_WITH_SSSE3)
else ()
target_compile_options(${target_name} PUBLIC -mssse3)
endif ()
elseif (simd_mode STREQUAL "SSE4")
if (MSVC)
target_compile_options(${target_name} PUBLIC /DXSIMD_WITH_SSE4_2 /d2archSSE42)
else ()
target_compile_options(${target_name} PUBLIC -msse4)
endif ()
elseif (simd_mode STREQUAL "AVX2")
if (MSVC)
target_compile_options(${target_name} PUBLIC /arch:AVX2)
else ()
target_compile_options(${target_name} PUBLIC -mavx2)
endif ()
endif ()
endif ()
endfunction()