Merge changes from The Witness.

This commit is contained in:
castano 2012-02-14 16:37:15 +00:00
parent 1248743859
commit f1c943cd57
5 changed files with 149 additions and 83 deletions

View File

@ -92,8 +92,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
{ {
int p = order[i]; int p = order[i];
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
Vector4 tmp(values[p] * set->weights[p], set->weights[p]); NV_ALIGN_16 Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
m_weighted[i] = SimdVector(tmp); m_weighted[i] = SimdVector(tmp.component);
m_xxsum += m_weighted[i] * m_weighted[i]; m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i]; m_xsum += m_weighted[i];
#else #else
@ -110,8 +110,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
void ClusterFit::setMetric(Vector4::Arg w) void ClusterFit::setMetric(Vector4::Arg w)
{ {
#if NVTT_USE_SIMD #if NVTT_USE_SIMD
Vector4 tmp(w.xyz(), 1); NV_ALIGN_16 Vector4 tmp(w.xyz(), 1);
m_metric = SimdVector(tmp); m_metric = SimdVector(tmp.component);
#else #else
m_metric = w.xyz(); m_metric = w.xyz();
#endif #endif
@ -134,13 +134,13 @@ float ClusterFit::bestError() const
bool ClusterFit::compress3( Vector3 * start, Vector3 * end ) bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
{ {
int const count = m_count; const int count = m_count;
SimdVector const one = SimdVector(1.0f); const SimdVector one = SimdVector(1.0f);
SimdVector const zero = SimdVector(0.0f); const SimdVector zero = SimdVector(0.0f);
SimdVector const half(0.5f, 0.5f, 0.5f, 0.25f); const SimdVector half(0.5f, 0.5f, 0.5f, 0.25f);
SimdVector const two = SimdVector(2.0); const SimdVector two = SimdVector(2.0);
SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f ); const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f );
SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables // declare variables
SimdVector beststart = SimdVector( 0.0f ); SimdVector beststart = SimdVector( 0.0f );
@ -158,23 +158,23 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
for( int c1 = 0; c1 <= count-c0; c1++) for( int c1 = 0; c1 <= count-c0; c1++)
{ {
SimdVector const x2 = m_xsum - x1 - x0; const SimdVector x2 = m_xsum - x1 - x0;
//Vector3 const alphax_sum = x0 + x1 * 0.5f; //Vector3 alphax_sum = x0 + x1 * 0.5f;
//float const alpha2_sum = w0 + w1 * 0.25f; //float alpha2_sum = w0 + w1 * 0.25f;
SimdVector const alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum const SimdVector alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
SimdVector const alpha2_sum = alphax_sum.splatW(); const SimdVector alpha2_sum = alphax_sum.splatW();
//Vector3 const betax_sum = x2 + x1 * 0.5f; //const Vector3 betax_sum = x2 + x1 * 0.5f;
//float const beta2_sum = w2 + w1 * 0.25f; //const float beta2_sum = w2 + w1 * 0.25f;
SimdVector const betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum const SimdVector betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
SimdVector const beta2_sum = betax_sum.splatW(); const SimdVector beta2_sum = betax_sum.splatW();
//float const alphabeta_sum = w1 * 0.25f; //const float alphabeta_sum = w1 * 0.25f;
SimdVector const alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum const SimdVector alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); // const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
@ -229,16 +229,16 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
{ {
int const count = m_count; const int count = m_count;
SimdVector const one = SimdVector(1.0f); const SimdVector one = SimdVector(1.0f);
SimdVector const zero = SimdVector(0.0f); const SimdVector zero = SimdVector(0.0f);
SimdVector const half = SimdVector(0.5f); const SimdVector half = SimdVector(0.5f);
SimdVector const two = SimdVector(2.0); const SimdVector two = SimdVector(2.0);
SimdVector const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f ); const SimdVector onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
SimdVector const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f ); const SimdVector twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
SimdVector const twonineths = SimdVector( 2.0f/9.0f ); const SimdVector twonineths = SimdVector( 2.0f/9.0f );
SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f ); const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f );
SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f ); const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables // declare variables
SimdVector beststart = SimdVector( 0.0f ); SimdVector beststart = SimdVector( 0.0f );
@ -259,23 +259,23 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
for( int c2 = 0; c2 <= count-c0-c1; c2++) for( int c2 = 0; c2 <= count-c0-c1; c2++)
{ {
SimdVector const x3 = m_xsum - x2 - x1 - x0; const SimdVector x3 = m_xsum - x2 - x1 - x0;
//Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f); //const Vector3 alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f); //const float alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
SimdVector const alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum const SimdVector alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
SimdVector const alpha2_sum = alphax_sum.splatW(); const SimdVector alpha2_sum = alphax_sum.splatW();
//Vector3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f); //const Vector3 betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f); //const float beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
SimdVector const betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum const SimdVector betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
SimdVector const beta2_sum = betax_sum.splatW(); const SimdVector beta2_sum = betax_sum.splatW();
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f); //const float alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
SimdVector const alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum const SimdVector alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum); //const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) ); const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor; SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor; SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;

View File

@ -113,7 +113,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
*/ */
struct CompressorContext struct FixedBlockCompressorContext
{ {
nvtt::AlphaMode alphaMode; nvtt::AlphaMode alphaMode;
uint w, h; uint w, h;
@ -125,10 +125,10 @@ struct CompressorContext
FixedBlockCompressor * compressor; FixedBlockCompressor * compressor;
}; };
// Each task compresses one row. // Each task compresses one block.
void CompressorTask(void * data, int i) void FixedBlockCompressorTask(void * data, int i)
{ {
CompressorContext * d = (CompressorContext *) data; FixedBlockCompressorContext * d = (FixedBlockCompressorContext *) data;
uint x = i % d->bw; uint x = i % d->bw;
uint y = i / d->bw; uint y = i / d->bw;
@ -147,7 +147,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
{ {
nvDebugCheck(d == 1); nvDebugCheck(d == 1);
CompressorContext context; FixedBlockCompressorContext context;
context.alphaMode = alphaMode; context.alphaMode = alphaMode;
context.w = w; context.w = w;
context.h = h; context.h = h;
@ -169,7 +169,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
const uint size = context.bs * count; const uint size = context.bs * count;
context.mem = new uint8[size]; context.mem = new uint8[size];
dispatcher->dispatch(CompressorTask, &context, count); dispatcher->dispatch(FixedBlockCompressorTask, &context, count);
outputOptions.writeData(context.mem, size); outputOptions.writeData(context.mem, size);
@ -177,35 +177,67 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
} }
struct ColorSetCompressorContext
{
nvtt::AlphaMode alphaMode;
uint w, h;
const float * data;
const nvtt::CompressionOptions::Private * compressionOptions;
uint bw, bh, bs;
uint8 * mem;
ColorSetCompressor * compressor;
};
// Each task compresses one block.
void ColorSetCompressorTask(void * data, int i)
{
ColorSetCompressorContext * d = (ColorSetCompressorContext *) data;
uint x = i % d->bw;
uint y = i / d->bw;
//for (uint x = 0; x < d->bw; x++)
{
ColorSet set;
set.setColors(d->data, d->w, d->h, x, y);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
d->compressor->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr);
}
}
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
nvDebugCheck(d == 1); nvDebugCheck(d == 1);
const uint bs = blockSize(); ColorSetCompressorContext context;
const uint bw = (w + 3) / 4; context.alphaMode = alphaMode;
const uint bh = (h + 3) / 4; context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
//bool singleThreaded = true; context.bs = blockSize();
//if (singleThreaded) context.bw = (w + 3) / 4;
{ context.bh = (h + 3) / 4;
uint8 * mem = malloc<uint8>(bs * bw);
ColorSet set; context.compressor = this;
for (uint y = 0; y < h; y += 4) { SequentialTaskDispatcher sequential;
uint8 * ptr = mem;
for (uint x = 0; x < w; x += 4, ptr += bs) {
set.setColors(data, w, h, x, y);
compressBlock(set, alphaMode, compressionOptions, ptr);
}
if (outputOptions.outputHandler != NULL) { // Use a single thread to compress small textures.
outputOptions.outputHandler->writeData(mem, bs * bw); if (context.bh < 4) dispatcher = &sequential;
}
}
free(mem); const uint count = context.bw * context.bh;
} const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(ColorSetCompressorTask, &context, count);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
} }

View File

@ -721,8 +721,8 @@ void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, co
if (color0 < color1) if (color0 < color1)
{ {
swap(maxColor, minColor); swap(maxColor, minColor);
swap(color0, color1); swap(color0, color1);
} }
block->col0 = Color16(color0); block->col0 = Color16(color0);
@ -741,8 +741,8 @@ void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, co
if (color0 > color1) if (color0 > color1)
{ {
swap(maxColor, minColor); swap(maxColor, minColor);
swap(color0, color1); swap(color0, color1);
} }
block->col0 = Color16(color0); block->col0 = Color16(color0);

View File

@ -432,6 +432,30 @@ bool Surface::save(const char * fileName) const
return false; return false;
} }
#if 0 //NV_OS_WIN32
#include <windows.h>
#undef min
#undef max
static int filter(unsigned int code, struct _EXCEPTION_POINTERS *ep) {
if (code == EXCEPTION_ACCESS_VIOLATION) {
return EXCEPTION_EXECUTE_HANDLER;
}
else {
return EXCEPTION_CONTINUE_SEARCH;
};
}
#define TRY __try
#define CATCH __except (filter(GetExceptionCode(), GetExceptionInformation()))
#else
#define TRY
#define CATCH
#endif
bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void * data) bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void * data)
{ {
detach(); detach();
@ -453,7 +477,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
{ {
const Color32 * src = (const Color32 *)data; const Color32 * src = (const Color32 *)data;
try { TRY {
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
rdst[i] = float(src[i].r) / 255.0f; rdst[i] = float(src[i].r) / 255.0f;
@ -462,7 +486,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
adst[i] = float(src[i].a) / 255.0f; adst[i] = float(src[i].a) / 255.0f;
} }
} }
catch(...) { CATCH {
return false; return false;
} }
} }
@ -470,7 +494,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
{ {
const uint16 * src = (const uint16 *)data; const uint16 * src = (const uint16 *)data;
try { TRY {
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
((uint32 *)rdst)[i] = half_to_float(src[4*i+0]); ((uint32 *)rdst)[i] = half_to_float(src[4*i+0]);
@ -479,7 +503,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
((uint32 *)adst)[i] = half_to_float(src[4*i+3]); ((uint32 *)adst)[i] = half_to_float(src[4*i+3]);
} }
} }
catch(...) { CATCH {
return false; return false;
} }
} }
@ -487,7 +511,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
{ {
const float * src = (const float *)data; const float * src = (const float *)data;
try { TRY {
for (int i = 0; i < count; i++) for (int i = 0; i < count; i++)
{ {
rdst[i] = src[4 * i + 0]; rdst[i] = src[4 * i + 0];
@ -496,7 +520,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
adst[i] = src[4 * i + 3]; adst[i] = src[4 * i + 3];
} }
} }
catch(...) { CATCH {
return false; return false;
} }
} }

View File

@ -270,6 +270,11 @@ int main(int argc, char *argv[])
i++; i++;
} }
} }
else if (strcmp("-pause", argv[i]) == 0)
{
printf("Press ENTER\n"); fflush(stdout);
getchar();
}
// Output options // Output options
else if (strcmp("-silent", argv[i]) == 0) else if (strcmp("-silent", argv[i]) == 0)
@ -529,6 +534,11 @@ int main(int argc, char *argv[])
compressionOptions.setColorWeights(1, 1, 0); compressionOptions.setColorWeights(1, 1, 0);
} }
//compressionOptions.setColorWeights(0.2126, 0.7152, 0.0722);
//compressionOptions.setColorWeights(0.299, 0.587, 0.114);
//compressionOptions.setColorWeights(3, 4, 2);
if (externalCompressor != NULL) if (externalCompressor != NULL)
{ {
compressionOptions.setExternalCompressor(externalCompressor); compressionOptions.setExternalCompressor(externalCompressor);