Merge changes from The Witness.
This commit is contained in:
parent
1248743859
commit
f1c943cd57
@ -92,8 +92,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
||||
{
|
||||
int p = order[i];
|
||||
#if NVTT_USE_SIMD
|
||||
Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
|
||||
m_weighted[i] = SimdVector(tmp);
|
||||
NV_ALIGN_16 Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
|
||||
m_weighted[i] = SimdVector(tmp.component);
|
||||
m_xxsum += m_weighted[i] * m_weighted[i];
|
||||
m_xsum += m_weighted[i];
|
||||
#else
|
||||
@ -110,8 +110,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
||||
void ClusterFit::setMetric(Vector4::Arg w)
|
||||
{
|
||||
#if NVTT_USE_SIMD
|
||||
Vector4 tmp(w.xyz(), 1);
|
||||
m_metric = SimdVector(tmp);
|
||||
NV_ALIGN_16 Vector4 tmp(w.xyz(), 1);
|
||||
m_metric = SimdVector(tmp.component);
|
||||
#else
|
||||
m_metric = w.xyz();
|
||||
#endif
|
||||
@ -134,13 +134,13 @@ float ClusterFit::bestError() const
|
||||
|
||||
bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
|
||||
{
|
||||
int const count = m_count;
|
||||
SimdVector const one = SimdVector(1.0f);
|
||||
SimdVector const zero = SimdVector(0.0f);
|
||||
SimdVector const half(0.5f, 0.5f, 0.5f, 0.25f);
|
||||
SimdVector const two = SimdVector(2.0);
|
||||
SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
||||
SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
||||
const int count = m_count;
|
||||
const SimdVector one = SimdVector(1.0f);
|
||||
const SimdVector zero = SimdVector(0.0f);
|
||||
const SimdVector half(0.5f, 0.5f, 0.5f, 0.25f);
|
||||
const SimdVector two = SimdVector(2.0);
|
||||
const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
||||
const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
||||
|
||||
// declare variables
|
||||
SimdVector beststart = SimdVector( 0.0f );
|
||||
@ -158,23 +158,23 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
|
||||
|
||||
for( int c1 = 0; c1 <= count-c0; c1++)
|
||||
{
|
||||
SimdVector const x2 = m_xsum - x1 - x0;
|
||||
const SimdVector x2 = m_xsum - x1 - x0;
|
||||
|
||||
//Vector3 const alphax_sum = x0 + x1 * 0.5f;
|
||||
//float const alpha2_sum = w0 + w1 * 0.25f;
|
||||
SimdVector const alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
|
||||
SimdVector const alpha2_sum = alphax_sum.splatW();
|
||||
//Vector3 alphax_sum = x0 + x1 * 0.5f;
|
||||
//float alpha2_sum = w0 + w1 * 0.25f;
|
||||
const SimdVector alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
|
||||
const SimdVector alpha2_sum = alphax_sum.splatW();
|
||||
|
||||
//Vector3 const betax_sum = x2 + x1 * 0.5f;
|
||||
//float const beta2_sum = w2 + w1 * 0.25f;
|
||||
SimdVector const betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
|
||||
SimdVector const beta2_sum = betax_sum.splatW();
|
||||
//const Vector3 betax_sum = x2 + x1 * 0.5f;
|
||||
//const float beta2_sum = w2 + w1 * 0.25f;
|
||||
const SimdVector betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
|
||||
const SimdVector beta2_sum = betax_sum.splatW();
|
||||
|
||||
//float const alphabeta_sum = w1 * 0.25f;
|
||||
SimdVector const alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
|
||||
//const float alphabeta_sum = w1 * 0.25f;
|
||||
const SimdVector alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
|
||||
|
||||
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
|
||||
// const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||
const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
|
||||
|
||||
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
|
||||
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
|
||||
@ -229,16 +229,16 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
|
||||
|
||||
bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
|
||||
{
|
||||
int const count = m_count;
|
||||
SimdVector const one = SimdVector(1.0f);
|
||||
SimdVector const zero = SimdVector(0.0f);
|
||||
SimdVector const half = SimdVector(0.5f);
|
||||
SimdVector const two = SimdVector(2.0);
|
||||
SimdVector const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
|
||||
SimdVector const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
|
||||
SimdVector const twonineths = SimdVector( 2.0f/9.0f );
|
||||
SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
||||
SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
||||
const int count = m_count;
|
||||
const SimdVector one = SimdVector(1.0f);
|
||||
const SimdVector zero = SimdVector(0.0f);
|
||||
const SimdVector half = SimdVector(0.5f);
|
||||
const SimdVector two = SimdVector(2.0);
|
||||
const SimdVector onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
|
||||
const SimdVector twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
|
||||
const SimdVector twonineths = SimdVector( 2.0f/9.0f );
|
||||
const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f );
|
||||
const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
|
||||
|
||||
// declare variables
|
||||
SimdVector beststart = SimdVector( 0.0f );
|
||||
@ -259,23 +259,23 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
|
||||
|
||||
for( int c2 = 0; c2 <= count-c0-c1; c2++)
|
||||
{
|
||||
SimdVector const x3 = m_xsum - x2 - x1 - x0;
|
||||
const SimdVector x3 = m_xsum - x2 - x1 - x0;
|
||||
|
||||
//Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
|
||||
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
|
||||
SimdVector const alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
|
||||
SimdVector const alpha2_sum = alphax_sum.splatW();
|
||||
//const Vector3 alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
|
||||
//const float alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
|
||||
const SimdVector alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
|
||||
const SimdVector alpha2_sum = alphax_sum.splatW();
|
||||
|
||||
//Vector3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
|
||||
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
|
||||
SimdVector const betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
|
||||
SimdVector const beta2_sum = betax_sum.splatW();
|
||||
//const Vector3 betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
|
||||
//const float beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
|
||||
const SimdVector betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
|
||||
const SimdVector beta2_sum = betax_sum.splatW();
|
||||
|
||||
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
|
||||
SimdVector const alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
|
||||
//const float alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
|
||||
const SimdVector alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
|
||||
|
||||
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
|
||||
//const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||
const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
|
||||
|
||||
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
|
||||
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
|
||||
|
@ -113,7 +113,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
|
||||
*/
|
||||
|
||||
|
||||
struct CompressorContext
|
||||
struct FixedBlockCompressorContext
|
||||
{
|
||||
nvtt::AlphaMode alphaMode;
|
||||
uint w, h;
|
||||
@ -125,10 +125,10 @@ struct CompressorContext
|
||||
FixedBlockCompressor * compressor;
|
||||
};
|
||||
|
||||
// Each task compresses one row.
|
||||
void CompressorTask(void * data, int i)
|
||||
// Each task compresses one block.
|
||||
void FixedBlockCompressorTask(void * data, int i)
|
||||
{
|
||||
CompressorContext * d = (CompressorContext *) data;
|
||||
FixedBlockCompressorContext * d = (FixedBlockCompressorContext *) data;
|
||||
|
||||
uint x = i % d->bw;
|
||||
uint y = i / d->bw;
|
||||
@ -147,7 +147,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
|
||||
{
|
||||
nvDebugCheck(d == 1);
|
||||
|
||||
CompressorContext context;
|
||||
FixedBlockCompressorContext context;
|
||||
context.alphaMode = alphaMode;
|
||||
context.w = w;
|
||||
context.h = h;
|
||||
@ -169,7 +169,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
|
||||
const uint size = context.bs * count;
|
||||
context.mem = new uint8[size];
|
||||
|
||||
dispatcher->dispatch(CompressorTask, &context, count);
|
||||
dispatcher->dispatch(FixedBlockCompressorTask, &context, count);
|
||||
|
||||
outputOptions.writeData(context.mem, size);
|
||||
|
||||
@ -177,35 +177,67 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
|
||||
}
|
||||
|
||||
|
||||
struct ColorSetCompressorContext
|
||||
{
|
||||
nvtt::AlphaMode alphaMode;
|
||||
uint w, h;
|
||||
const float * data;
|
||||
const nvtt::CompressionOptions::Private * compressionOptions;
|
||||
|
||||
uint bw, bh, bs;
|
||||
uint8 * mem;
|
||||
ColorSetCompressor * compressor;
|
||||
};
|
||||
|
||||
|
||||
// Each task compresses one block.
|
||||
void ColorSetCompressorTask(void * data, int i)
|
||||
{
|
||||
ColorSetCompressorContext * d = (ColorSetCompressorContext *) data;
|
||||
|
||||
uint x = i % d->bw;
|
||||
uint y = i / d->bw;
|
||||
|
||||
//for (uint x = 0; x < d->bw; x++)
|
||||
{
|
||||
ColorSet set;
|
||||
set.setColors(d->data, d->w, d->h, x, y);
|
||||
|
||||
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
|
||||
d->compressor->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
|
||||
{
|
||||
nvDebugCheck(d == 1);
|
||||
|
||||
const uint bs = blockSize();
|
||||
const uint bw = (w + 3) / 4;
|
||||
const uint bh = (h + 3) / 4;
|
||||
ColorSetCompressorContext context;
|
||||
context.alphaMode = alphaMode;
|
||||
context.w = w;
|
||||
context.h = h;
|
||||
context.data = data;
|
||||
context.compressionOptions = &compressionOptions;
|
||||
|
||||
//bool singleThreaded = true;
|
||||
//if (singleThreaded)
|
||||
{
|
||||
uint8 * mem = malloc<uint8>(bs * bw);
|
||||
context.bs = blockSize();
|
||||
context.bw = (w + 3) / 4;
|
||||
context.bh = (h + 3) / 4;
|
||||
|
||||
ColorSet set;
|
||||
context.compressor = this;
|
||||
|
||||
for (uint y = 0; y < h; y += 4) {
|
||||
uint8 * ptr = mem;
|
||||
for (uint x = 0; x < w; x += 4, ptr += bs) {
|
||||
set.setColors(data, w, h, x, y);
|
||||
compressBlock(set, alphaMode, compressionOptions, ptr);
|
||||
}
|
||||
SequentialTaskDispatcher sequential;
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
outputOptions.outputHandler->writeData(mem, bs * bw);
|
||||
}
|
||||
}
|
||||
// Use a single thread to compress small textures.
|
||||
if (context.bh < 4) dispatcher = &sequential;
|
||||
|
||||
free(mem);
|
||||
}
|
||||
const uint count = context.bw * context.bh;
|
||||
const uint size = context.bs * count;
|
||||
context.mem = new uint8[size];
|
||||
|
||||
dispatcher->dispatch(ColorSetCompressorTask, &context, count);
|
||||
|
||||
outputOptions.writeData(context.mem, size);
|
||||
|
||||
delete [] context.mem;
|
||||
}
|
||||
|
@ -432,6 +432,30 @@ bool Surface::save(const char * fileName) const
|
||||
return false;
|
||||
}
|
||||
|
||||
#if 0 //NV_OS_WIN32
|
||||
|
||||
#include <windows.h>
|
||||
#undef min
|
||||
#undef max
|
||||
|
||||
static int filter(unsigned int code, struct _EXCEPTION_POINTERS *ep) {
|
||||
if (code == EXCEPTION_ACCESS_VIOLATION) {
|
||||
return EXCEPTION_EXECUTE_HANDLER;
|
||||
}
|
||||
else {
|
||||
return EXCEPTION_CONTINUE_SEARCH;
|
||||
};
|
||||
}
|
||||
|
||||
#define TRY __try
|
||||
|
||||
#define CATCH __except (filter(GetExceptionCode(), GetExceptionInformation()))
|
||||
#else
|
||||
#define TRY
|
||||
#define CATCH
|
||||
#endif
|
||||
|
||||
|
||||
bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void * data)
|
||||
{
|
||||
detach();
|
||||
@ -453,7 +477,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
|
||||
{
|
||||
const Color32 * src = (const Color32 *)data;
|
||||
|
||||
try {
|
||||
TRY {
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
rdst[i] = float(src[i].r) / 255.0f;
|
||||
@ -462,7 +486,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
|
||||
adst[i] = float(src[i].a) / 255.0f;
|
||||
}
|
||||
}
|
||||
catch(...) {
|
||||
CATCH {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -470,7 +494,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
|
||||
{
|
||||
const uint16 * src = (const uint16 *)data;
|
||||
|
||||
try {
|
||||
TRY {
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
((uint32 *)rdst)[i] = half_to_float(src[4*i+0]);
|
||||
@ -479,7 +503,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
|
||||
((uint32 *)adst)[i] = half_to_float(src[4*i+3]);
|
||||
}
|
||||
}
|
||||
catch(...) {
|
||||
CATCH {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@ -487,7 +511,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
|
||||
{
|
||||
const float * src = (const float *)data;
|
||||
|
||||
try {
|
||||
TRY {
|
||||
for (int i = 0; i < count; i++)
|
||||
{
|
||||
rdst[i] = src[4 * i + 0];
|
||||
@ -496,7 +520,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
|
||||
adst[i] = src[4 * i + 3];
|
||||
}
|
||||
}
|
||||
catch(...) {
|
||||
CATCH {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
@ -270,6 +270,11 @@ int main(int argc, char *argv[])
|
||||
i++;
|
||||
}
|
||||
}
|
||||
else if (strcmp("-pause", argv[i]) == 0)
|
||||
{
|
||||
printf("Press ENTER\n"); fflush(stdout);
|
||||
getchar();
|
||||
}
|
||||
|
||||
// Output options
|
||||
else if (strcmp("-silent", argv[i]) == 0)
|
||||
@ -529,6 +534,11 @@ int main(int argc, char *argv[])
|
||||
compressionOptions.setColorWeights(1, 1, 0);
|
||||
}
|
||||
|
||||
|
||||
//compressionOptions.setColorWeights(0.2126, 0.7152, 0.0722);
|
||||
//compressionOptions.setColorWeights(0.299, 0.587, 0.114);
|
||||
//compressionOptions.setColorWeights(3, 4, 2);
|
||||
|
||||
if (externalCompressor != NULL)
|
||||
{
|
||||
compressionOptions.setExternalCompressor(externalCompressor);
|
||||
|
Loading…
Reference in New Issue
Block a user