Precompute fast cluster fit factors, and store as static const.
nvtt is completely reentrant now. Fixes issue 37. cleanup interface of cuda compressors.
This commit is contained in:
113
src/nvtt/squish/extra/squishgen2.cpp
Normal file
113
src/nvtt/squish/extra/squishgen2.cpp
Normal file
@ -0,0 +1,113 @@
|
||||
/* -----------------------------------------------------------------------------
|
||||
|
||||
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
|
||||
Copyright (c) 2008 Ignacio Castano castano@gmail.com
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining
|
||||
a copy of this software and associated documentation files (the
|
||||
"Software"), to deal in the Software without restriction, including
|
||||
without limitation the rights to use, copy, modify, merge, publish,
|
||||
distribute, sublicense, and/or sell copies of the Software, and to
|
||||
permit persons to whom the Software is furnished to do so, subject to
|
||||
the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included
|
||||
in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
||||
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
-------------------------------------------------------------------------- */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
struct Precomp {
|
||||
float alpha2_sum;
|
||||
float beta2_sum;
|
||||
float alphabeta_sum;
|
||||
float factor;
|
||||
};
|
||||
|
||||
|
||||
int main()
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
printf("struct Precomp {\n");
|
||||
printf("\tfloat alpha2_sum;\n");
|
||||
printf("\tfloat beta2_sum;\n");
|
||||
printf("\tfloat alphabeta_sum;\n");
|
||||
printf("\tfloat factor;\n");
|
||||
printf("};\n\n");
|
||||
|
||||
printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n");
|
||||
|
||||
// Three element clusters:
|
||||
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
|
||||
{
|
||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||
{
|
||||
int c2 = 16 - c0 - c1;
|
||||
|
||||
Precomp p;
|
||||
p.alpha2_sum = c0 + c1 * 0.25f;
|
||||
p.beta2_sum = c2 + c1 * 0.25f;
|
||||
p.alphabeta_sum = c1 * 0.25f;
|
||||
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
|
||||
|
||||
if (isfinite(p.factor))
|
||||
{
|
||||
printf("\t{ %f, %f, %f, %f }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("\t{ %f, %f, %f, INFINITY }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
printf("}; // %d three cluster elements\n\n", i);
|
||||
|
||||
printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n");
|
||||
|
||||
// Four element clusters:
|
||||
i = 0;
|
||||
for( int c0 = 0; c0 <= 16; c0++)
|
||||
{
|
||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||
{
|
||||
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
||||
{
|
||||
int c3 = 16 - c0 - c1 - c2;
|
||||
|
||||
Precomp p;
|
||||
p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
|
||||
p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
|
||||
p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
|
||||
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
|
||||
|
||||
if (isfinite(p.factor))
|
||||
{
|
||||
printf("\t{ %f, %f, %f, %f }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3);
|
||||
}
|
||||
else
|
||||
{
|
||||
printf("\t{ %f, %f, %f, INFINITY }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
printf("}; // %d four cluster elements\n\n", i);
|
||||
|
||||
return 0;
|
||||
}
|
@ -29,6 +29,8 @@
|
||||
#include "colourblock.h"
|
||||
#include <cfloat>
|
||||
|
||||
#include "fastclusterlookup.inl"
|
||||
|
||||
namespace squish {
|
||||
|
||||
FastClusterFit::FastClusterFit()
|
||||
@ -97,91 +99,6 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
|
||||
}
|
||||
|
||||
|
||||
struct Precomp {
|
||||
float alpha2_sum;
|
||||
float beta2_sum;
|
||||
float alphabeta_sum;
|
||||
float factor;
|
||||
};
|
||||
|
||||
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
|
||||
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
|
||||
|
||||
void FastClusterFit::DoPrecomputation()
|
||||
{
|
||||
int i = 0;
|
||||
|
||||
// Three element clusters:
|
||||
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
|
||||
{
|
||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||
{
|
||||
int c2 = 16 - c0 - c1;
|
||||
|
||||
/*if (c2 == 16) {
|
||||
// a = b = x2 / 16
|
||||
s_threeElement[i].alpha2_sum = 0;
|
||||
s_threeElement[i].beta2_sum = 16;
|
||||
s_threeElement[i].alphabeta_sum = -16;
|
||||
s_threeElement[i].factor = 1.0f / 256.0f;
|
||||
}
|
||||
else if (c0 == 16) {
|
||||
// a = b = x0 / 16
|
||||
s_threeElement[i].alpha2_sum = 16;
|
||||
s_threeElement[i].beta2_sum = 0;
|
||||
s_threeElement[i].alphabeta_sum = -16;
|
||||
s_threeElement[i].factor = 1.0f / 256.0f;
|
||||
}
|
||||
else*/ {
|
||||
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
|
||||
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
|
||||
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
|
||||
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
//printf("%d three cluster elements\n", i);
|
||||
|
||||
// Four element clusters:
|
||||
i = 0;
|
||||
for( int c0 = 0; c0 <= 16; c0++)
|
||||
{
|
||||
for( int c1 = 0; c1 <= 16-c0; c1++)
|
||||
{
|
||||
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
|
||||
{
|
||||
int c3 = 16 - c0 - c1 - c2;
|
||||
|
||||
/*if (c3 == 16) {
|
||||
// a = b = x3 / 16
|
||||
s_fourElement[i].alpha2_sum = 16.0f;
|
||||
s_fourElement[i].beta2_sum = 0.0f;
|
||||
s_fourElement[i].alphabeta_sum = -16.0f;
|
||||
s_fourElement[i].factor = 1.0f / 256.0f;
|
||||
}
|
||||
else if (c0 == 16) {
|
||||
// a = b = x0 / 16
|
||||
s_fourElement[i].alpha2_sum = 0.0f;
|
||||
s_fourElement[i].beta2_sum = 16.0f;
|
||||
s_fourElement[i].alphabeta_sum = -16.0f;
|
||||
s_fourElement[i].factor = 1.0f / 256.0f;
|
||||
}
|
||||
else*/ {
|
||||
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
|
||||
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
|
||||
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
|
||||
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
|
||||
}
|
||||
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
//printf("%d four cluster elements\n", i);
|
||||
}
|
||||
|
||||
void FastClusterFit::SetMetric(float r, float g, float b)
|
||||
{
|
||||
#if SQUISH_USE_SIMD
|
||||
|
@ -44,8 +44,6 @@ public:
|
||||
void SetMetric(float r, float g, float b);
|
||||
float GetBestError() const;
|
||||
|
||||
static void DoPrecomputation();
|
||||
|
||||
// Make them public
|
||||
virtual void Compress3( void* block );
|
||||
virtual void Compress4( void* block );
|
||||
|
1135
src/nvtt/squish/fastclusterlookup.inl
Normal file
1135
src/nvtt/squish/fastclusterlookup.inl
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user