mirror of
https://github.com/drewcassidy/quicktex.git
synced 2024-09-13 06:37:34 +00:00
Compare commits
6 Commits
460785ee7d
...
faster-sin
Author | SHA1 | Date | |
---|---|---|---|
8f48330191 | |||
c879061e4e | |||
628ad558d8 | |||
db2d5dbe61 | |||
a61e8c0ca0 | |||
4217d526cf |
3
.gitmodules
vendored
3
.gitmodules
vendored
@ -2,3 +2,6 @@
|
||||
path = extern/pybind11
|
||||
url = https://github.com/pybind/pybind11.git
|
||||
branch = stable
|
||||
[submodule "extern/gif-h"]
|
||||
path = extern/gif-h
|
||||
url = https://github.com/charlietangora/gif-h.git
|
||||
|
@ -16,6 +16,8 @@ file(GLOB TEST_FILES "src/test/*.c" "src/test/*.cpp" "src/test/*.h")
|
||||
# Organize source files together for some IDEs
|
||||
source_group(TREE ${CMAKE_CURRENT_SOURCE_DIR} FILES ${SOURCE_FILES} ${HEADER_FILES} ${PYTHON_FILES})
|
||||
|
||||
include_directories(${CMAKE_SOURCE_DIR}/extern/gif-h)
|
||||
|
||||
# Add python module
|
||||
pybind11_add_module(python_rgbcx
|
||||
${SOURCE_FILES}
|
||||
|
1
extern/gif-h
vendored
Submodule
1
extern/gif-h
vendored
Submodule
Submodule extern/gif-h added at 3d2657b9ad
@ -19,14 +19,12 @@
|
||||
|
||||
#include "BC1Decoder.h"
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../BlockView.h"
|
||||
#include "../Color.h"
|
||||
#include "../Interpolator.h"
|
||||
#include "../ndebug.h"
|
||||
#include "BC1Block.h"
|
||||
|
||||
|
192
src/BC1/BC1Encoder.cpp
Normal file
192
src/BC1/BC1Encoder.cpp
Normal file
@ -0,0 +1,192 @@
|
||||
/* Python-rgbcx Texture Compression Library
|
||||
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||
and licenced under the public domain
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "BC1Encoder.h"
|
||||
|
||||
#include <gif.h>
|
||||
#include <string>
|
||||
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "../BlockView.h"
|
||||
#include "../Color.h"
|
||||
#include "../bitwiseEnums.h"
|
||||
|
||||
namespace rgbcx {
|
||||
using MatchList = std::array<BC1MatchEntry, 256>;
|
||||
using MatchListPtr = std::shared_ptr<MatchList>;
|
||||
using InterpolatorPtr = std::shared_ptr<Interpolator>;
|
||||
|
||||
// region Free Functions/Templates
|
||||
inline void PrepSingleColorTableEntry(unsigned &error, MatchList &match_table, uint8_t v, unsigned i, uint8_t low, uint8_t high, uint8_t low8, uint8_t high8,
|
||||
bool ideal) {
|
||||
unsigned new_error = iabs(v - (int)i);
|
||||
|
||||
// We only need to factor in 3% error in BC1 ideal mode.
|
||||
if (ideal) new_error += (iabs(high8 - (int)low8) * 3) / 100;
|
||||
|
||||
// Favor equal endpoints, for lower error on actual GPU's which approximate the interpolation.
|
||||
if ((new_error < error) || (new_error == error && low == high)) {
|
||||
assert(new_error <= UINT8_MAX);
|
||||
|
||||
match_table[i].low = (uint8_t)low;
|
||||
match_table[i].high = (uint8_t)high;
|
||||
match_table[i].error = (uint8_t)new_error;
|
||||
|
||||
// error = new_error;
|
||||
}
|
||||
error = new_error;
|
||||
}
|
||||
|
||||
template <size_t S> void PrepSingleColorTable(MatchList &match_table, MatchList &match_table_half, Interpolator &interpolator) {
|
||||
unsigned size = 1 << S;
|
||||
|
||||
std::vector<uint8_t> frame(size * size * 4, 0);
|
||||
auto fileName = "lut" + std::to_string(S) + ".gif";
|
||||
GifWriter g;
|
||||
GifBegin(&g, fileName.c_str(), size, size, 10);
|
||||
|
||||
assert((S == 5 && size == 32) || (S == 6 && size == 64));
|
||||
|
||||
bool ideal = interpolator.IsIdeal();
|
||||
bool use_8bit = interpolator.CanInterpolate8Bit();
|
||||
|
||||
for (unsigned i = 0; i < 256; i++) {
|
||||
unsigned error = 256;
|
||||
unsigned error_half = 256;
|
||||
|
||||
// TODO: Can probably avoid testing for values that definitely wont yield good results,
|
||||
// e.g. low8 and high8 both much smaller or larger than index
|
||||
for (uint8_t low = 0; low < size; low++) {
|
||||
uint8_t low8 = (S == 5) ? scale5To8(low) : scale6To8(low);
|
||||
|
||||
for (uint8_t high = 0; high < size; high++) {
|
||||
uint8_t high8 = (S == 5) ? scale5To8(high) : scale6To8(high);
|
||||
uint8_t value, value_half;
|
||||
|
||||
if (use_8bit) {
|
||||
value = interpolator.Interpolate8(high8, low8);
|
||||
value_half = interpolator.InterpolateHalf8(high8, low8);
|
||||
} else {
|
||||
value = (S == 5) ? interpolator.Interpolate5(high, low) : interpolator.Interpolate6(high, low);
|
||||
value_half = (S == 5) ? interpolator.InterpolateHalf5(high, low) : interpolator.InterpolateHalf6(high, low);
|
||||
}
|
||||
|
||||
PrepSingleColorTableEntry(error, match_table, value, i, low, high, low8, high8, ideal);
|
||||
PrepSingleColorTableEntry(error_half, match_table_half, value_half, i, low, high, low8, high8, ideal);
|
||||
frame[(low + (size * high))*4] = error;
|
||||
frame[(low + (size * high))*4+1] = error;
|
||||
frame[(low + (size * high))*4+2] = error;
|
||||
frame[(low + (size * high))*4+3] = 255;
|
||||
|
||||
}
|
||||
}
|
||||
GifWriteFrame(&g, frame.data(), size, size, 10);
|
||||
}
|
||||
|
||||
GifEnd(&g);
|
||||
}
|
||||
// endregion
|
||||
|
||||
BC1Encoder::BC1Encoder(InterpolatorPtr interpolator) : _interpolator(interpolator) {
|
||||
PrepSingleColorTable<5>(*_single_match5, *_single_match5_half, *_interpolator);
|
||||
PrepSingleColorTable<6>(*_single_match6, *_single_match6_half, *_interpolator);
|
||||
}
|
||||
|
||||
void BC1Encoder::EncodeBlock(Color4x4 pixels, BC1Block *dest) const {
|
||||
auto r_view = pixels.GetChannel(0);
|
||||
auto g_view = pixels.GetChannel(1);
|
||||
auto b_view = pixels.GetChannel(2);
|
||||
|
||||
if (pixels.IsSingleColor() || true) { // for now assume (wrongly) everything is a single-color block
|
||||
// single-color pixel block, do it the fast way
|
||||
EncodeBlockSingleColor(pixels.Get(0, 0), dest);
|
||||
return;
|
||||
}
|
||||
|
||||
Color min, max, avg;
|
||||
pixels.GetMinMaxAvgRGB(min, max, avg);
|
||||
}
|
||||
|
||||
void BC1Encoder::EncodeBlockSingleColor(Color color, BC1Block *dest) const {
|
||||
uint8_t mask = 0xAA; // 2222
|
||||
uint16_t min16, max16;
|
||||
|
||||
bool using_3color = false;
|
||||
|
||||
// why is there no subscript operator for shared_ptr<array>
|
||||
MatchList &match5 = *_single_match5;
|
||||
MatchList &match6 = *_single_match6;
|
||||
MatchList &match5_half = *_single_match5_half;
|
||||
MatchList &match6_half = *_single_match6_half;
|
||||
|
||||
BC1MatchEntry match_r = match5[color.r];
|
||||
BC1MatchEntry match_g = match6[color.g];
|
||||
BC1MatchEntry match_b = match5[color.b];
|
||||
|
||||
if ((_flags & (Flags::Use3ColorBlocks | Flags::Use3ColorBlocksForBlackPixels)) != Flags::None) {
|
||||
BC1MatchEntry match_r_half = match5_half[color.r];
|
||||
BC1MatchEntry match_g_half = match6_half[color.g];
|
||||
BC1MatchEntry match_b_half = match5_half[color.b];
|
||||
|
||||
const unsigned err4 = match_r.error + match_g.error + match_b.error;
|
||||
const unsigned err3 = match_r_half.error + match_g_half.error + match_b_half.error;
|
||||
|
||||
if (err3 < err4) {
|
||||
min16 = Color::Pack565Unscaled(match_r_half.low, match_g_half.low, match_b_half.low);
|
||||
max16 = Color::Pack565Unscaled(match_r_half.high, match_g_half.high, match_b_half.high);
|
||||
|
||||
if (max16 > min16) std::swap(min16, max16);
|
||||
using_3color = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!using_3color) {
|
||||
min16 = Color::Pack565Unscaled(match_r.low, match_g.low, match_b.low);
|
||||
max16 = Color::Pack565Unscaled(match_r.high, match_g.high, match_b.high);
|
||||
|
||||
if (min16 == max16) {
|
||||
// make sure this isnt accidentally a 3-color block
|
||||
// so make max16 > min16 (l > h)
|
||||
if (min16 > 0) {
|
||||
min16--;
|
||||
mask = 0; // endpoints are equal so mask doesnt matter
|
||||
} else {
|
||||
assert(min16 == 0 && max16 == 0);
|
||||
max16 = 1;
|
||||
min16 = 0;
|
||||
mask = 0x55; // 1111 (min value only, max is ignored)
|
||||
}
|
||||
} else if (max16 < min16) {
|
||||
std::swap(min16, max16);
|
||||
mask = 0xFF; // invert mask to 3333
|
||||
}
|
||||
assert(max16 > min16);
|
||||
}
|
||||
|
||||
dest->SetLowColor(max16);
|
||||
dest->SetHighColor(min16);
|
||||
dest->selectors[0] = mask;
|
||||
dest->selectors[1] = mask;
|
||||
dest->selectors[2] = mask;
|
||||
dest->selectors[3] = mask;
|
||||
}
|
||||
|
||||
} // namespace rgbcx
|
145
src/BC1/BC1Encoder.h
Normal file
145
src/BC1/BC1Encoder.h
Normal file
@ -0,0 +1,145 @@
|
||||
/* Python-rgbcx Texture Compression Library
|
||||
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||
and licenced under the public domain
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
#include <mutex>
|
||||
|
||||
#include "../BlockEncoder.h"
|
||||
#include "../BlockView.h"
|
||||
#include "../Interpolator.h"
|
||||
#include "../bitwiseEnums.h"
|
||||
#include "../ndebug.h"
|
||||
#include "../tables.h"
|
||||
#include "BC1Block.h"
|
||||
|
||||
namespace rgbcx {
|
||||
|
||||
struct BC1MatchEntry {
|
||||
uint8_t high;
|
||||
uint8_t low;
|
||||
uint8_t error;
|
||||
};
|
||||
|
||||
class BC1Encoder : public BlockEncoder<BC1Block, 4, 4> {
|
||||
public:
|
||||
using InterpolatorPtr = std::shared_ptr<Interpolator>;
|
||||
|
||||
enum class Flags : uint32_t {
|
||||
None = 0,
|
||||
|
||||
// Try to improve quality using the most likely total orderings.
|
||||
// The total_orderings_to_try parameter will then control the number of total orderings to try for 4 color blocks, and the
|
||||
// total_orderings_to_try3 parameter will control the number of total orderings to try for 3 color blocks (if they are enabled).
|
||||
UseLikelyTotalOrderings = 2,
|
||||
|
||||
// Use 2 least squares pass, instead of one (same as stb_dxt's HIGHQUAL option).
|
||||
// Recommended if you're enabling UseLikelyTotalOrderings.
|
||||
TwoLeastSquaresPasses = 4,
|
||||
|
||||
// Use3ColorBlocksForBlackPixels allows the BC1 encoder to use 3-color blocks for blocks containing black or very dark pixels.
|
||||
// You shader/engine MUST ignore the alpha channel on textures encoded with this flag.
|
||||
// Average quality goes up substantially for my 100 texture corpus (~.5 dB), so it's worth using if you can.
|
||||
// Note the BC1 encoder does not actually support transparency in 3-color mode.
|
||||
// Don't set when encoding to BC3.
|
||||
Use3ColorBlocksForBlackPixels = 8,
|
||||
|
||||
// If Use3ColorBlocks is set, the encoder can use 3-color mode for a small but noticeable gain in average quality, but lower perf.
|
||||
// If you also specify the UseLikelyTotalOrderings flag, set the total_orderings_to_try3 paramter to the number of total orderings to try.
|
||||
// Don't set when encoding to BC3.
|
||||
Use3ColorBlocks = 16,
|
||||
|
||||
// Iterative will greatly increase encode time, but is very slightly higher quality.
|
||||
// Same as squish's iterative cluster fit option. Not really worth the tiny boost in quality, unless you just don't care about perf. at all.
|
||||
Iterative = 32,
|
||||
|
||||
// BoundingBox enables a fast all-integer PCA approximation on 4-color blocks.
|
||||
// At level 0 options (no other flags), this is ~15% faster, and higher *average* quality.
|
||||
BoundingBox = 64,
|
||||
|
||||
// Use a slightly lower quality, but ~30% faster MSE evaluation function for 4-color blocks.
|
||||
UseFasterMSEEval = 128,
|
||||
|
||||
// Examine all colors to compute selectors/MSE (slower than default)
|
||||
UseFullMSEEval = 256,
|
||||
|
||||
// Use 2D least squares+inset+optimal rounding (the method used in Humus's GPU texture encoding demo), instead of PCA.
|
||||
// Around 18% faster, very slightly lower average quality to better (depends on the content).
|
||||
Use2DLS = 512,
|
||||
|
||||
// Use 6 power iterations vs. 4 for PCA.
|
||||
Use6PowerIters = 2048,
|
||||
|
||||
// Check all total orderings - *very* slow. The encoder is not designed to be used in this way.
|
||||
Exhaustive = 8192,
|
||||
|
||||
// Try 2 different ways of choosing the initial endpoints.
|
||||
TryAllInitialEndponts = 16384,
|
||||
|
||||
// Same as BoundingBox, but implemented using integer math (faster, slightly less quality)
|
||||
BoundingBoxInt = 32768,
|
||||
|
||||
// Try refining the final endpoints by examining nearby colors.
|
||||
EndpointSearchRoundsShift = 22,
|
||||
EndpointSearchRoundsMask = 1023U << EndpointSearchRoundsShift,
|
||||
};
|
||||
|
||||
BC1Encoder(InterpolatorPtr interpolator);
|
||||
|
||||
|
||||
void EncodeBlock(Color4x4 pixels, BC1Block *dest) const override;
|
||||
|
||||
private:
|
||||
const InterpolatorPtr _interpolator;
|
||||
|
||||
Flags _flags;
|
||||
unsigned _search_rounds;
|
||||
unsigned _orderings4;
|
||||
unsigned _orderings3;
|
||||
|
||||
void EncodeBlockSingleColor(Color color, BC1Block *dest) const;
|
||||
|
||||
// match tables used for single-color blocks
|
||||
// Each entry includes a high and low pair that best reproduces the 8-bit index as well as possible,
|
||||
// with an included error value
|
||||
// these depend on the interpolator
|
||||
using MatchList = std::array<BC1MatchEntry, 256>;
|
||||
using MatchListPtr = std::shared_ptr<MatchList>;
|
||||
|
||||
const MatchListPtr _single_match5 = std::make_shared<MatchList>();
|
||||
const MatchListPtr _single_match6 = std::make_shared<MatchList>();
|
||||
const MatchListPtr _single_match5_half = std::make_shared<MatchList>();
|
||||
const MatchListPtr _single_match6_half = std::make_shared<MatchList>();
|
||||
|
||||
// static lookup tables, generated the first time an encoder is created
|
||||
// the mutex prevents race conditions if two encoders are created on different threads
|
||||
static std::mutex _luts_mutex;
|
||||
static bool _luts_initialized;
|
||||
|
||||
// lookup table for hash values
|
||||
static uint16_t g_total_ordering4_hash[4096];
|
||||
static uint16_t g_total_ordering3_hash[256];
|
||||
|
||||
static float g_selector_factors4[NUM_UNIQUE_TOTAL_ORDERINGS4][3];
|
||||
static float g_selector_factors3[NUM_UNIQUE_TOTAL_ORDERINGS3][3];
|
||||
};
|
||||
} // namespace rgbcx
|
45
src/BC1/SelectorHistogram.h
Normal file
45
src/BC1/SelectorHistogram.h
Normal file
@ -0,0 +1,45 @@
|
||||
/* Python-rgbcx Texture Compression Library
|
||||
Copyright (C) 2021 Andrew Cassidy <drewcassidy@me.com>
|
||||
Partially derived from rgbcx.h written by Richard Geldreich <richgel99@gmail.com>
|
||||
and licenced under the public domain
|
||||
|
||||
This program is free software: you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation, either version 3 of the License, or
|
||||
(at your option) any later version.
|
||||
|
||||
This program is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
GNU Lesser General Public License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <array>
|
||||
|
||||
namespace rgbcx {
|
||||
//
|
||||
//class SelectorHistogram<size_t Size, size_t Max> {
|
||||
// public:
|
||||
// std::array<uint8_t, Size> histogram;
|
||||
//
|
||||
// bool operator==(const SelectorHistogram<N> &other) const {
|
||||
// for (unsigned i = 0; i < Size; i++) {
|
||||
// if (histogram[i] != other.histogram[i]) return false;
|
||||
// }
|
||||
// return true;
|
||||
// }
|
||||
//
|
||||
// bool AnyMax() cost {
|
||||
// for (unsigned i = 0; i < Size; i++) {
|
||||
// if (histogram[i] == Max) return true;
|
||||
// }
|
||||
// return false;
|
||||
// }
|
||||
//};
|
||||
|
||||
} // namespace rgbcx
|
@ -23,6 +23,7 @@
|
||||
#include "../BC4/BC4Decoder.h"
|
||||
#include "../BlockView.h"
|
||||
#include "../ndebug.h"
|
||||
#include "BC3Block.h"
|
||||
|
||||
namespace rgbcx {
|
||||
|
||||
|
@ -68,6 +68,11 @@ class BC4Block {
|
||||
SetSelectorBits(packed);
|
||||
}
|
||||
|
||||
void PackSelectors(const std::array<uint8_t, 16>& unpacked) {
|
||||
auto packed = Pack<uint8_t, uint64_t, 3, 16>(unpacked);
|
||||
SetSelectorBits(packed);
|
||||
}
|
||||
|
||||
inline uint32_t GetSelector(uint32_t x, uint32_t y, uint64_t selector_bits) const {
|
||||
assert((x < 4U) && (y < 4U));
|
||||
return (selector_bits >> (((y * 4) + x) * SelectorBits)) & (SelectorMask);
|
||||
|
@ -19,12 +19,10 @@
|
||||
|
||||
#include "BC4Decoder.h"
|
||||
|
||||
#include <assert.h> // for assert
|
||||
|
||||
#include <array> // for array
|
||||
#include <array> // for array
|
||||
#include <cassert> // for assert
|
||||
|
||||
#include "../BlockView.h" // for ColorBlock
|
||||
#include "../Color.h" // for Color
|
||||
#include "../ndebug.h" // for ndebug
|
||||
#include "BC4Block.h"
|
||||
|
||||
|
@ -19,7 +19,8 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../BlockDecoder.h"
|
||||
#include "../BlockView.h"
|
||||
|
@ -19,89 +19,53 @@
|
||||
|
||||
#include "BC4Encoder.h"
|
||||
|
||||
#include <algorithm>
|
||||
#include <algorithm> // for minmax_element
|
||||
#include <array> // for array
|
||||
#include <cstdint> // for uint8_t
|
||||
#include <utility> // for pair
|
||||
|
||||
#include "BC4Block.h" // for BC4Block
|
||||
|
||||
namespace rgbcx {
|
||||
void BC4Encoder::EncodeBlock(Byte4x4 pixels, BC4Block *const dest) const noexcept(ndebug) {
|
||||
auto bytes = pixels.Flatten();
|
||||
auto minmax = std::minmax_element(bytes.begin(), bytes.end());
|
||||
auto flattened = pixels.Flatten();
|
||||
auto minmax = std::minmax_element(flattened.begin(), flattened.end());
|
||||
|
||||
uint8_t min_v = *minmax.first;
|
||||
uint8_t max_v = *minmax.second;
|
||||
uint8_t min = *minmax.first;
|
||||
uint8_t max = *minmax.second;
|
||||
|
||||
dest->high_alpha = min_v;
|
||||
dest->low_alpha = max_v;
|
||||
dest->high_alpha = min;
|
||||
dest->low_alpha = max;
|
||||
|
||||
if (max_v == min_v) {
|
||||
if (max == min) {
|
||||
dest->SetSelectorBits(0);
|
||||
return;
|
||||
}
|
||||
|
||||
const uint32_t delta = max_v - min_v;
|
||||
|
||||
// min_v is now 0. Compute thresholds between values by scaling max_v. It's x14 because we're adding two x7 scale factors.
|
||||
const int t0 = delta * 13;
|
||||
const int t1 = delta * 11;
|
||||
const int t2 = delta * 9;
|
||||
const int t3 = delta * 7;
|
||||
const int t4 = delta * 5;
|
||||
const int t5 = delta * 3;
|
||||
const int t6 = delta * 1;
|
||||
std::array<uint8_t, 16> selectors = {};
|
||||
const static std::array<uint8_t, 8> Levels = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U}; // selector value options in linear order
|
||||
|
||||
// BC4 floors in its divisions, which we compensate for with the 4 bias.
|
||||
// This function is optimal for all possible inputs (i.e. it outputs the same results as checking all 8 values and choosing the closest one).
|
||||
const int bias = 4 - min_v * 14;
|
||||
const int bias = 4 - min * 14;
|
||||
const int delta = max - min;
|
||||
|
||||
static const uint32_t s_tran0[8] = {1U, 7U, 6U, 5U, 4U, 3U, 2U, 0U};
|
||||
static const uint32_t s_tran1[8] = {1U << 3U, 7U << 3U, 6U << 3U, 5U << 3U, 4U << 3U, 3U << 3U, 2U << 3U, 0U << 3U};
|
||||
static const uint32_t s_tran2[8] = {1U << 6U, 7U << 6U, 6U << 6U, 5U << 6U, 4U << 6U, 3U << 6U, 2U << 6U, 0U << 6U};
|
||||
static const uint32_t s_tran3[8] = {1U << 9U, 7U << 9U, 6U << 9U, 5U << 9U, 4U << 9U, 3U << 9U, 2U << 9U, 0U << 9U};
|
||||
// min is now 0. Compute thresholds between values by scaling max. It's x14 because we're adding two x7 scale factors.
|
||||
// bias is applied here
|
||||
std::array<int, 7> thresholds = {};
|
||||
for (unsigned i = 0; i < 7; i++) thresholds[i] = delta * (1 + (2 * (int)i)) - bias;
|
||||
|
||||
uint64_t a0, a1, a2, a3;
|
||||
{
|
||||
const int v0 = bytes[0] * 14 + bias;
|
||||
const int v1 = bytes[1] * 14 + bias;
|
||||
const int v2 = bytes[2] * 14 + bias;
|
||||
const int v3 = bytes[3] * 14 + bias;
|
||||
a0 = s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)];
|
||||
a1 = s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)];
|
||||
a2 = s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)];
|
||||
a3 = s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)];
|
||||
// iterate over all values and calculate selectors
|
||||
for (unsigned i = 0; i < 16; i++) {
|
||||
int value = flattened[i] * 14; // multiply by demonimator
|
||||
|
||||
// level = number of thresholds this value is greater than
|
||||
unsigned level = 0;
|
||||
for (unsigned c = 0; c < 7; c++) level += value >= thresholds[c];
|
||||
|
||||
selectors[i] = Levels[level];
|
||||
}
|
||||
|
||||
{
|
||||
const int v0 = bytes[4] * 14 + bias;
|
||||
const int v1 = bytes[5] * 14 + bias;
|
||||
const int v2 = bytes[6] * 14 + bias;
|
||||
const int v3 = bytes[7] * 14 + bias;
|
||||
a0 |= (uint64_t)(s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)] << 12U);
|
||||
a1 |= (uint64_t)(s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)] << 12U);
|
||||
a2 |= (uint64_t)(s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)] << 12U);
|
||||
a3 |= (uint64_t)(s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)] << 12U);
|
||||
}
|
||||
|
||||
{
|
||||
const int v0 = bytes[8] * 14 + bias;
|
||||
const int v1 = bytes[9] * 14 + bias;
|
||||
const int v2 = bytes[10] * 14 + bias;
|
||||
const int v3 = bytes[11] * 14 + bias;
|
||||
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 24U);
|
||||
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 24U);
|
||||
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 24U);
|
||||
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 24U);
|
||||
}
|
||||
|
||||
{
|
||||
const int v0 = bytes[12] * 14 + bias;
|
||||
const int v1 = bytes[13] * 14 + bias;
|
||||
const int v2 = bytes[14] * 14 + bias;
|
||||
const int v3 = bytes[15] * 14 + bias;
|
||||
a0 |= (((uint64_t)s_tran0[(v0 >= t0) + (v0 >= t1) + (v0 >= t2) + (v0 >= t3) + (v0 >= t4) + (v0 >= t5) + (v0 >= t6)]) << 36U);
|
||||
a1 |= (((uint64_t)s_tran1[(v1 >= t0) + (v1 >= t1) + (v1 >= t2) + (v1 >= t3) + (v1 >= t4) + (v1 >= t5) + (v1 >= t6)]) << 36U);
|
||||
a2 |= (((uint64_t)s_tran2[(v2 >= t0) + (v2 >= t1) + (v2 >= t2) + (v2 >= t3) + (v2 >= t4) + (v2 >= t5) + (v2 >= t6)]) << 36U);
|
||||
a3 |= (((uint64_t)s_tran3[(v3 >= t0) + (v3 >= t1) + (v3 >= t2) + (v3 >= t3) + (v3 >= t4) + (v3 >= t5) + (v3 >= t6)]) << 36U);
|
||||
}
|
||||
|
||||
dest->SetSelectorBits(a0 | a1 | a2 | a3);
|
||||
dest->PackSelectors(selectors);
|
||||
}
|
||||
} // namespace rgbcx
|
@ -19,8 +19,14 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cassert>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../BlockEncoder.h"
|
||||
#include "../BlockView.h"
|
||||
#include "../ndebug.h"
|
||||
#include "BC4Block.h"
|
||||
|
||||
namespace rgbcx {
|
||||
|
||||
class BC4Encoder : public BlockEncoder<BC4Block, 4, 4> {
|
||||
|
@ -19,9 +19,9 @@
|
||||
|
||||
#include "BC5Decoder.h"
|
||||
|
||||
#include "../BC4/BC4Decoder.h"
|
||||
#include "../BlockView.h"
|
||||
#include "../ndebug.h"
|
||||
#include "BC5Block.h"
|
||||
|
||||
namespace rgbcx {
|
||||
|
||||
|
@ -19,8 +19,9 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <cassert>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
|
||||
#include "../BC4/BC4Decoder.h"
|
||||
|
@ -81,10 +81,13 @@ template <typename S, size_t M, size_t N> class BlockView {
|
||||
start[(row_stride * (int)y) + (pixel_stride * (int)x)] = value;
|
||||
}
|
||||
|
||||
constexpr S &Get(unsigned i) noexcept(ndebug) { return Get(i % N, i / N); }
|
||||
constexpr S Get(unsigned i) const noexcept(ndebug) { return Get(i % N, i / N); }
|
||||
|
||||
constexpr std::array<S, M * N> Flatten() noexcept {
|
||||
std::array<S, M * N> result;
|
||||
for (int x = 0; x < N; x++) {
|
||||
for (int y = 0; y < M; y++) { result[x + (N * y)] = start[(row_stride * y) + (pixel_stride * x)]; }
|
||||
for (unsigned x = 0; x < N; x++) {
|
||||
for (unsigned y = 0; y < M; y++) { result[x + (N * y)] = Get(x, y); }
|
||||
}
|
||||
return result;
|
||||
}
|
||||
@ -108,6 +111,34 @@ template <size_t M, size_t N> class ColorBlockView : public BlockView<Color, M,
|
||||
}
|
||||
|
||||
void SetRGB(unsigned x, unsigned y, Color value) noexcept(ndebug) { Base::Get(x, y).SetRGB(value); }
|
||||
|
||||
bool IsSingleColor() {
|
||||
auto first = Base::Get(0, 0);
|
||||
for (unsigned j = 1; j < M * N; j++) {
|
||||
if (Base::Get(j) != first) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
void GetMinMaxAvgRGB(Color &min, Color &max, Color &avg) {
|
||||
min = Base::Get(0, 0);
|
||||
max = Base::Get(0, 0);
|
||||
std::array<unsigned, 3> sums;
|
||||
|
||||
for (unsigned i = 1; i < M * N; i++) {
|
||||
auto val = Base::Get(i);
|
||||
for (unsigned c = 0; c < 3; c++) {
|
||||
if (val[c] < min[c]) {
|
||||
min[c] = val[c];
|
||||
} else {
|
||||
max[c] = val[c];
|
||||
}
|
||||
sums[c] += val[c];
|
||||
}
|
||||
}
|
||||
|
||||
for (unsigned c = 0; c < 3; c++) { avg[c] = (uint8_t)(sums[c] / (M * N)); }
|
||||
}
|
||||
};
|
||||
|
||||
using Color4x4 = ColorBlockView<4, 4>;
|
||||
|
@ -52,6 +52,16 @@ class Interpolator {
|
||||
*/
|
||||
virtual uint8_t Interpolate6(uint8_t v0, uint8_t v1) const;
|
||||
|
||||
/**
|
||||
* Performs a 2/3 interpolation of a pair of 8-bit values to produce an 8-bit value
|
||||
* Output is approximately (2v0 + v1)/3.
|
||||
* Output is not guranteed to be accurate for the given interpolator if CanInterpolate8Bit() is false
|
||||
* @param v0 The first 8-bit value
|
||||
* @param v1 The second 8-bit value
|
||||
* @return The interpolated value
|
||||
*/
|
||||
virtual uint8_t Interpolate8(uint8_t v0, uint8_t v1) const;
|
||||
|
||||
/**
|
||||
* Performs a 1/2 interpolation of a pair of 5-bit values to produce an 8-bit value
|
||||
* Output is approximately (v0 + v1)/2, with v0 and v1 first extended to 8 bits.
|
||||
@ -70,6 +80,16 @@ class Interpolator {
|
||||
*/
|
||||
virtual uint8_t InterpolateHalf6(uint8_t v0, uint8_t v1) const;
|
||||
|
||||
/**
|
||||
* Performs a 1/2 interpolation of a pair of 8-bit values to produce an 8-bit value
|
||||
* Output is approximately (v0 + v1)/2.
|
||||
* Output is not guranteed to be accurate for the given interpolator if CanInterpolate8Bit() is false
|
||||
* @param v0 The first 8-bit value
|
||||
* @param v1 The second 8-bit value
|
||||
* @return The interpolated value
|
||||
*/
|
||||
virtual uint8_t InterpolateHalf8(uint8_t v0, uint8_t v1) const;
|
||||
|
||||
/**
|
||||
* Generates the 4 colors for a BC1 block from the given 5:6:5-packed colors
|
||||
* @param low first 5:6:5 color for the block
|
||||
@ -84,6 +104,8 @@ class Interpolator {
|
||||
*/
|
||||
virtual Type GetType() const noexcept { return Type::Ideal; }
|
||||
|
||||
virtual bool CanInterpolate8Bit() const noexcept { return true; }
|
||||
|
||||
/**
|
||||
* Checks if the interpolator uses an ideal algorithm
|
||||
* @return true if the interpolator is ideal, false otherwise.
|
||||
@ -94,21 +116,6 @@ class Interpolator {
|
||||
}
|
||||
|
||||
private:
|
||||
virtual uint8_t Interpolate8(uint8_t v0, uint8_t v1) const;
|
||||
virtual uint8_t InterpolateHalf8(uint8_t v0, uint8_t v1) const;
|
||||
|
||||
// constexpr static auto Expand5 = ExpandArray<Size5, scale5To8>();
|
||||
// constexpr static auto Expand6 = ExpandArray<size6, scale6To8>();
|
||||
//
|
||||
// // match tables used for single-color blocks
|
||||
// using MatchList = std::array<MatchEntry, match_count>;
|
||||
// using MatchListPtr = std::shared_ptr<MatchList>;
|
||||
//
|
||||
// const MatchListPtr _single_match5 = {std::make_shared<MatchList>()};
|
||||
// const MatchListPtr _single_match6 = {std::make_shared<MatchList>()};
|
||||
// const MatchListPtr _single_match5_half = {std::make_shared<MatchList>()};
|
||||
// const MatchListPtr _single_match6_half = {std::make_shared<MatchList>()};
|
||||
|
||||
Color InterpolateColor24(const Color &c0, const Color &c1) const {
|
||||
return Color(Interpolate8(c0.r, c1.r), Interpolate8(c0.g, c1.g), Interpolate8(c0.b, c1.b));
|
||||
}
|
||||
@ -116,33 +123,29 @@ class Interpolator {
|
||||
Color InterpolateHalfColor24(const Color &c0, const Color &c1) const {
|
||||
return Color(InterpolateHalf8(c0.r, c1.r), InterpolateHalf8(c0.g, c1.g), InterpolateHalf8(c0.b, c1.b));
|
||||
}
|
||||
|
||||
// virtual constexpr bool useExpandedInMatch() noexcept { return true; }
|
||||
//
|
||||
// void PrepSingleColorTables(const MatchListPtr &matchTable, const MatchListPtr &matchTableHalf, int len);
|
||||
//
|
||||
// int PrepSingleColorTableEntry(const MatchListPtr &matchTable, int v, int i, int low, int high, int low_e, int high_e, int lowest_error, bool half,
|
||||
// bool ideal);
|
||||
};
|
||||
|
||||
class InterpolatorRound : public Interpolator {
|
||||
public:
|
||||
uint8_t Interpolate5(uint8_t v0, uint8_t v1) const override;
|
||||
uint8_t Interpolate6(uint8_t v0, uint8_t v1) const override;
|
||||
Type GetType() const noexcept override { return Type::IdealRound; }
|
||||
|
||||
private:
|
||||
uint8_t Interpolate8(uint8_t v0, uint8_t v1) const override;
|
||||
|
||||
Type GetType() const noexcept override { return Type::IdealRound; }
|
||||
};
|
||||
|
||||
class InterpolatorNvidia : public Interpolator {
|
||||
public:
|
||||
uint8_t Interpolate5(uint8_t v0, uint8_t v1) const override;
|
||||
uint8_t Interpolate6(uint8_t v0, uint8_t v1) const override;
|
||||
|
||||
uint8_t InterpolateHalf5(uint8_t v0, uint8_t v1) const override;
|
||||
uint8_t InterpolateHalf6(uint8_t v0, uint8_t v1) const override;
|
||||
|
||||
std::array<Color, 4> InterpolateBC1(uint16_t low, uint16_t high) const override;
|
||||
|
||||
Type GetType() const noexcept override { return Type::Nvidia; }
|
||||
bool CanInterpolate8Bit() const noexcept override { return false; }
|
||||
|
||||
private:
|
||||
Color InterpolateColor565(const Color &c0, const Color &c1) const {
|
||||
@ -158,12 +161,12 @@ class InterpolatorAMD : public Interpolator {
|
||||
public:
|
||||
uint8_t Interpolate5(uint8_t v0, uint8_t v1) const override;
|
||||
uint8_t Interpolate6(uint8_t v0, uint8_t v1) const override;
|
||||
uint8_t Interpolate8(uint8_t v0, uint8_t v1) const override;
|
||||
|
||||
uint8_t InterpolateHalf5(uint8_t v0, uint8_t v1) const override;
|
||||
uint8_t InterpolateHalf6(uint8_t v0, uint8_t v1) const override;
|
||||
Type GetType() const noexcept override { return Type::AMD; }
|
||||
|
||||
private:
|
||||
uint8_t Interpolate8(uint8_t v0, uint8_t v1) const override;
|
||||
uint8_t InterpolateHalf8(uint8_t v0, uint8_t v1) const override;
|
||||
|
||||
Type GetType() const noexcept override { return Type::AMD; }
|
||||
};
|
||||
} // namespace rgbcx
|
@ -18,6 +18,7 @@
|
||||
#include <vector>
|
||||
|
||||
#include "../BC4/BC4Encoder.h"
|
||||
#include "../BC1/BC1Encoder.h"
|
||||
#include "../rgbcx.h"
|
||||
#include "../rgbcxDecoders.h"
|
||||
#include "../util.h"
|
||||
@ -659,12 +660,23 @@ int main(int argc, char *argv[]) {
|
||||
uint32_t bc7_mode_hist[8];
|
||||
memset(bc7_mode_hist, 0, sizeof(bc7_mode_hist));
|
||||
|
||||
#ifdef NDEBUG
|
||||
const int test_count = 1000;
|
||||
#else
|
||||
const int test_count = 1;
|
||||
#endif
|
||||
|
||||
if (dxgi_format == DXGI_FORMAT_BC4_UNORM) {
|
||||
auto bc4_encoder = BC4Encoder(bc45_channel0);
|
||||
Color *src = &source_image.get_pixels()[0];
|
||||
|
||||
bc4_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());
|
||||
for (int i = 0; i < test_count; i++)
|
||||
bc4_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());
|
||||
} else if (dxgi_format == DXGI_FORMAT_BC1_UNORM) {
|
||||
auto bc1_encoder = BC1Encoder(Interpolator::MakeInterpolator());
|
||||
Color *src = &source_image.get_pixels()[0];
|
||||
|
||||
bc1_encoder.EncodeImage(reinterpret_cast<uint8_t *>(&packed_image8[0]), src, source_image.width(), source_image.height());
|
||||
} else {
|
||||
for (uint32_t by = 0; by < blocks_y; by++) {
|
||||
for (uint32_t bx = 0; bx < blocks_x; bx++) {
|
||||
@ -732,7 +744,7 @@ int main(int argc, char *argv[]) {
|
||||
|
||||
clock_t end_t = clock();
|
||||
|
||||
printf("\nTotal time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC);
|
||||
printf("\nTotal time: %f secs\n", (double)(end_t - start_t) / CLOCKS_PER_SEC / test_count);
|
||||
|
||||
if (dxgi_format == DXGI_FORMAT_BC7_UNORM) {
|
||||
printf("BC7 mode histogram:\n");
|
||||
|
Reference in New Issue
Block a user