Precompute fast cluster fit factors, and store as static const.

nvtt is completely reentrant now. Fixes issue 37.
cleanup interface of cuda compressors.
This commit is contained in:
castano
2008-04-18 08:49:32 +00:00
parent 91eb30667f
commit 1df69495fc
8 changed files with 1296 additions and 316 deletions

View File

@ -0,0 +1,113 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2008 Ignacio Castano castano@gmail.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
#include <stdio.h>
#include <float.h>
#include <math.h>
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
int main()
{
int i = 0;
printf("struct Precomp {\n");
printf("\tfloat alpha2_sum;\n");
printf("\tfloat beta2_sum;\n");
printf("\tfloat alphabeta_sum;\n");
printf("\tfloat factor;\n");
printf("};\n\n");
printf("static const SQUISH_ALIGN_16 Precomp s_threeElement[153] = {\n");
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
Precomp p;
p.alpha2_sum = c0 + c1 * 0.25f;
p.beta2_sum = c2 + c1 * 0.25f;
p.alphabeta_sum = c1 * 0.25f;
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
if (isfinite(p.factor))
{
printf("\t{ %f, %f, %f, %f }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2);
}
else
{
printf("\t{ %f, %f, %f, INFINITY }, // %d (%d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2);
}
i++;
}
}
printf("}; // %d three cluster elements\n\n", i);
printf("static const SQUISH_ALIGN_16 Precomp s_fourElement[969] = {\n");
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
Precomp p;
p.alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
p.beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
p.alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
p.factor = 1.0f / (p.alpha2_sum * p.beta2_sum - p.alphabeta_sum * p.alphabeta_sum);
if (isfinite(p.factor))
{
printf("\t{ %f, %f, %f, %f }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, p.factor, i, c0, c1, c2, c3);
}
else
{
printf("\t{ %f, %f, %f, INFINITY }, // %d (%d %d %d %d)\n", p.alpha2_sum, p.beta2_sum, p.alphabeta_sum, i, c0, c1, c2, c3);
}
i++;
}
}
}
printf("}; // %d four cluster elements\n\n", i);
return 0;
}

View File

@ -29,6 +29,8 @@
#include "colourblock.h"
#include <cfloat>
#include "fastclusterlookup.inl"
namespace squish {
FastClusterFit::FastClusterFit()
@ -97,91 +99,6 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
}
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
void FastClusterFit::DoPrecomputation()
{
int i = 0;
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
/*if (c2 == 16) {
// a = b = x2 / 16
s_threeElement[i].alpha2_sum = 0;
s_threeElement[i].beta2_sum = 16;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_threeElement[i].alpha2_sum = 16;
s_threeElement[i].beta2_sum = 0;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
}
i++;
}
}
//printf("%d three cluster elements\n", i);
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
/*if (c3 == 16) {
// a = b = x3 / 16
s_fourElement[i].alpha2_sum = 16.0f;
s_fourElement[i].beta2_sum = 0.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_fourElement[i].alpha2_sum = 0.0f;
s_fourElement[i].beta2_sum = 16.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
}
i++;
}
}
}
//printf("%d four cluster elements\n", i);
}
void FastClusterFit::SetMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD

View File

@ -44,8 +44,6 @@ public:
void SetMetric(float r, float g, float b);
float GetBestError() const;
static void DoPrecomputation();
// Make them public
virtual void Compress3( void* block );
virtual void Compress4( void* block );

File diff suppressed because it is too large Load Diff