Merge changes from The Witness.

import/raw
castano 12 years ago
parent 3b385040d8
commit 24a5c8a21a

@ -92,8 +92,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
{
int p = order[i];
#if NVTT_USE_SIMD
Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
m_weighted[i] = SimdVector(tmp);
NV_ALIGN_16 Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
m_weighted[i] = SimdVector(tmp.component);
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
#else
@ -110,8 +110,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
void ClusterFit::setMetric(Vector4::Arg w)
{
#if NVTT_USE_SIMD
Vector4 tmp(w.xyz(), 1);
m_metric = SimdVector(tmp);
NV_ALIGN_16 Vector4 tmp(w.xyz(), 1);
m_metric = SimdVector(tmp.component);
#else
m_metric = w.xyz();
#endif
@ -134,13 +134,13 @@ float ClusterFit::bestError() const
bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
{
int const count = m_count;
SimdVector const one = SimdVector(1.0f);
SimdVector const zero = SimdVector(0.0f);
SimdVector const half(0.5f, 0.5f, 0.5f, 0.25f);
SimdVector const two = SimdVector(2.0);
SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f );
SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
const int count = m_count;
const SimdVector one = SimdVector(1.0f);
const SimdVector zero = SimdVector(0.0f);
const SimdVector half(0.5f, 0.5f, 0.5f, 0.25f);
const SimdVector two = SimdVector(2.0);
const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f );
const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
SimdVector beststart = SimdVector( 0.0f );
@ -158,23 +158,23 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
for( int c1 = 0; c1 <= count-c0; c1++)
{
SimdVector const x2 = m_xsum - x1 - x0;
const SimdVector x2 = m_xsum - x1 - x0;
//Vector3 const alphax_sum = x0 + x1 * 0.5f;
//float const alpha2_sum = w0 + w1 * 0.25f;
SimdVector const alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
SimdVector const alpha2_sum = alphax_sum.splatW();
//Vector3 alphax_sum = x0 + x1 * 0.5f;
//float alpha2_sum = w0 + w1 * 0.25f;
const SimdVector alphax_sum = multiplyAdd(x1, half, x0); // alphax_sum, alpha2_sum
const SimdVector alpha2_sum = alphax_sum.splatW();
//Vector3 const betax_sum = x2 + x1 * 0.5f;
//float const beta2_sum = w2 + w1 * 0.25f;
SimdVector const betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
SimdVector const beta2_sum = betax_sum.splatW();
//const Vector3 betax_sum = x2 + x1 * 0.5f;
//const float beta2_sum = w2 + w1 * 0.25f;
const SimdVector betax_sum = multiplyAdd(x1, half, x2); // betax_sum, beta2_sum
const SimdVector beta2_sum = betax_sum.splatW();
//float const alphabeta_sum = w1 * 0.25f;
SimdVector const alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
//const float alphabeta_sum = w1 * 0.25f;
const SimdVector alphabeta_sum = (x1 * half).splatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
// const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
@ -229,16 +229,16 @@ bool ClusterFit::compress3( Vector3 * start, Vector3 * end )
bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
{
int const count = m_count;
SimdVector const one = SimdVector(1.0f);
SimdVector const zero = SimdVector(0.0f);
SimdVector const half = SimdVector(0.5f);
SimdVector const two = SimdVector(2.0);
SimdVector const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
SimdVector const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
SimdVector const twonineths = SimdVector( 2.0f/9.0f );
SimdVector const grid( 31.0f, 63.0f, 31.0f, 0.0f );
SimdVector const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
const int count = m_count;
const SimdVector one = SimdVector(1.0f);
const SimdVector zero = SimdVector(0.0f);
const SimdVector half = SimdVector(0.5f);
const SimdVector two = SimdVector(2.0);
const SimdVector onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
const SimdVector twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
const SimdVector twonineths = SimdVector( 2.0f/9.0f );
const SimdVector grid( 31.0f, 63.0f, 31.0f, 0.0f );
const SimdVector gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
SimdVector beststart = SimdVector( 0.0f );
@ -259,23 +259,23 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
for( int c2 = 0; c2 <= count-c0-c1; c2++)
{
SimdVector const x3 = m_xsum - x2 - x1 - x0;
const SimdVector x3 = m_xsum - x2 - x1 - x0;
//Vector3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
SimdVector const alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
SimdVector const alpha2_sum = alphax_sum.splatW();
//const Vector3 alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//const float alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
const SimdVector alphax_sum = multiplyAdd(x2, onethird, multiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
const SimdVector alpha2_sum = alphax_sum.splatW();
//Vector3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
SimdVector const betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
SimdVector const beta2_sum = betax_sum.splatW();
//const Vector3 betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//const float beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
const SimdVector betax_sum = multiplyAdd(x2, twothirds, multiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
const SimdVector beta2_sum = betax_sum.splatW();
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
SimdVector const alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
//const float alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
const SimdVector alphabeta_sum = twonineths*( x1 + x2 ).splatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
SimdVector const factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
//const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
const SimdVector factor = reciprocal( negativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
SimdVector a = negativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
SimdVector b = negativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;

@ -113,7 +113,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c
*/
struct CompressorContext
struct FixedBlockCompressorContext
{
nvtt::AlphaMode alphaMode;
uint w, h;
@ -125,10 +125,10 @@ struct CompressorContext
FixedBlockCompressor * compressor;
};
// Each task compresses one row.
void CompressorTask(void * data, int i)
// Each task compresses one block.
void FixedBlockCompressorTask(void * data, int i)
{
CompressorContext * d = (CompressorContext *) data;
FixedBlockCompressorContext * d = (FixedBlockCompressorContext *) data;
uint x = i % d->bw;
uint y = i / d->bw;
@ -147,7 +147,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
{
nvDebugCheck(d == 1);
CompressorContext context;
FixedBlockCompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
@ -169,7 +169,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(CompressorTask, &context, count);
dispatcher->dispatch(FixedBlockCompressorTask, &context, count);
outputOptions.writeData(context.mem, size);
@ -177,35 +177,67 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
}
struct ColorSetCompressorContext
{
nvtt::AlphaMode alphaMode;
uint w, h;
const float * data;
const nvtt::CompressionOptions::Private * compressionOptions;
uint bw, bh, bs;
uint8 * mem;
ColorSetCompressor * compressor;
};
// Each task compresses one block.
void ColorSetCompressorTask(void * data, int i)
{
ColorSetCompressorContext * d = (ColorSetCompressorContext *) data;
uint x = i % d->bw;
uint y = i / d->bw;
//for (uint x = 0; x < d->bw; x++)
{
ColorSet set;
set.setColors(d->data, d->w, d->h, x, y);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
d->compressor->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr);
}
}
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
ColorSetCompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
//bool singleThreaded = true;
//if (singleThreaded)
{
uint8 * mem = malloc<uint8>(bs * bw);
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
ColorSet set;
context.compressor = this;
for (uint y = 0; y < h; y += 4) {
uint8 * ptr = mem;
for (uint x = 0; x < w; x += 4, ptr += bs) {
set.setColors(data, w, h, x, y);
compressBlock(set, alphaMode, compressionOptions, ptr);
}
SequentialTaskDispatcher sequential;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs * bw);
}
}
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
free(mem);
}
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(ColorSetCompressorTask, &context, count);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}

@ -721,8 +721,8 @@ void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, co
if (color0 < color1)
{
swap(maxColor, minColor);
swap(color0, color1);
swap(maxColor, minColor);
swap(color0, color1);
}
block->col0 = Color16(color0);
@ -741,8 +741,8 @@ void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, co
if (color0 > color1)
{
swap(maxColor, minColor);
swap(color0, color1);
swap(maxColor, minColor);
swap(color0, color1);
}
block->col0 = Color16(color0);

@ -432,6 +432,30 @@ bool Surface::save(const char * fileName) const
return false;
}
#if 0 //NV_OS_WIN32
#include <windows.h>
#undef min
#undef max
static int filter(unsigned int code, struct _EXCEPTION_POINTERS *ep) {
if (code == EXCEPTION_ACCESS_VIOLATION) {
return EXCEPTION_EXECUTE_HANDLER;
}
else {
return EXCEPTION_CONTINUE_SEARCH;
};
}
#define TRY __try
#define CATCH __except (filter(GetExceptionCode(), GetExceptionInformation()))
#else
#define TRY
#define CATCH
#endif
bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void * data)
{
detach();
@ -453,7 +477,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
{
const Color32 * src = (const Color32 *)data;
try {
TRY {
for (int i = 0; i < count; i++)
{
rdst[i] = float(src[i].r) / 255.0f;
@ -462,7 +486,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
adst[i] = float(src[i].a) / 255.0f;
}
}
catch(...) {
CATCH {
return false;
}
}
@ -470,7 +494,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
{
const uint16 * src = (const uint16 *)data;
try {
TRY {
for (int i = 0; i < count; i++)
{
((uint32 *)rdst)[i] = half_to_float(src[4*i+0]);
@ -479,7 +503,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
((uint32 *)adst)[i] = half_to_float(src[4*i+3]);
}
}
catch(...) {
CATCH {
return false;
}
}
@ -487,7 +511,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
{
const float * src = (const float *)data;
try {
TRY {
for (int i = 0; i < count; i++)
{
rdst[i] = src[4 * i + 0];
@ -496,7 +520,7 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
adst[i] = src[4 * i + 3];
}
}
catch(...) {
CATCH {
return false;
}
}

@ -270,6 +270,11 @@ int main(int argc, char *argv[])
i++;
}
}
else if (strcmp("-pause", argv[i]) == 0)
{
printf("Press ENTER\n"); fflush(stdout);
getchar();
}
// Output options
else if (strcmp("-silent", argv[i]) == 0)
@ -529,6 +534,11 @@ int main(int argc, char *argv[])
compressionOptions.setColorWeights(1, 1, 0);
}
//compressionOptions.setColorWeights(0.2126, 0.7152, 0.0722);
//compressionOptions.setColorWeights(0.299, 0.587, 0.114);
//compressionOptions.setColorWeights(3, 4, 2);
if (externalCompressor != NULL)
{
compressionOptions.setExternalCompressor(externalCompressor);

Loading…
Cancel
Save