Fix errors and enable new cluster fit compressor.
This commit is contained in:
parent
035997bc2e
commit
23bfc1b514
@ -17,5 +17,6 @@
|
|||||||
#define HAVE_JPEG
|
#define HAVE_JPEG
|
||||||
#define HAVE_TIFF
|
#define HAVE_TIFF
|
||||||
#endif*/
|
#endif*/
|
||||||
|
#define HAVE_STBIMAGE
|
||||||
|
|
||||||
#endif // NV_CONFIG
|
#endif // NV_CONFIG
|
||||||
|
@ -45,7 +45,7 @@
|
|||||||
<Tool
|
<Tool
|
||||||
Name="VCCLCompilerTool"
|
Name="VCCLCompilerTool"
|
||||||
Optimization="0"
|
Optimization="0"
|
||||||
AdditionalIncludeDirectories="$(GnuWinDir)/include; $(FreeImageDir)"
|
AdditionalIncludeDirectories="..\..\..\extern\stb"
|
||||||
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
||||||
MinimalRebuild="true"
|
MinimalRebuild="true"
|
||||||
BasicRuntimeChecks="3"
|
BasicRuntimeChecks="3"
|
||||||
@ -109,7 +109,7 @@
|
|||||||
<Tool
|
<Tool
|
||||||
Name="VCCLCompilerTool"
|
Name="VCCLCompilerTool"
|
||||||
Optimization="0"
|
Optimization="0"
|
||||||
AdditionalIncludeDirectories=""
|
AdditionalIncludeDirectories="..\..\..\extern\stb"
|
||||||
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
||||||
MinimalRebuild="true"
|
MinimalRebuild="true"
|
||||||
BasicRuntimeChecks="3"
|
BasicRuntimeChecks="3"
|
||||||
@ -177,7 +177,7 @@
|
|||||||
EnableIntrinsicFunctions="true"
|
EnableIntrinsicFunctions="true"
|
||||||
FavorSizeOrSpeed="0"
|
FavorSizeOrSpeed="0"
|
||||||
OmitFramePointers="true"
|
OmitFramePointers="true"
|
||||||
AdditionalIncludeDirectories="$(GnuWinDir)/include; $(FreeImageDir)"
|
AdditionalIncludeDirectories="..\..\..\extern\stb"
|
||||||
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
||||||
StringPooling="true"
|
StringPooling="true"
|
||||||
RuntimeLibrary="2"
|
RuntimeLibrary="2"
|
||||||
@ -246,7 +246,7 @@
|
|||||||
EnableIntrinsicFunctions="true"
|
EnableIntrinsicFunctions="true"
|
||||||
OmitFramePointers="true"
|
OmitFramePointers="true"
|
||||||
WholeProgramOptimization="true"
|
WholeProgramOptimization="true"
|
||||||
AdditionalIncludeDirectories=""
|
AdditionalIncludeDirectories="..\..\..\extern\stb"
|
||||||
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
||||||
StringPooling="true"
|
StringPooling="true"
|
||||||
RuntimeLibrary="2"
|
RuntimeLibrary="2"
|
||||||
|
@ -312,6 +312,10 @@
|
|||||||
RelativePath="..\..\..\src\nvtt\squish\colourset.h"
|
RelativePath="..\..\..\src\nvtt\squish\colourset.h"
|
||||||
>
|
>
|
||||||
</File>
|
</File>
|
||||||
|
<File
|
||||||
|
RelativePath="..\..\..\src\nvtt\squish\config.h"
|
||||||
|
>
|
||||||
|
</File>
|
||||||
<File
|
<File
|
||||||
RelativePath="..\..\..\src\nvtt\squish\maths.cpp"
|
RelativePath="..\..\..\src\nvtt\squish\maths.cpp"
|
||||||
>
|
>
|
||||||
|
@ -2,18 +2,29 @@
|
|||||||
|
|
||||||
#include "Vector.h" // Vector3, Vector4
|
#include "Vector.h" // Vector3, Vector4
|
||||||
|
|
||||||
|
|
||||||
// Set some reasonable defaults.
|
// Set some reasonable defaults.
|
||||||
#ifndef NV_USE_ALTIVEC
|
#ifndef NV_USE_ALTIVEC
|
||||||
# define NV_USE_ALTIVEC NV_CPU_PPC
|
# define NV_USE_ALTIVEC NV_CPU_PPC
|
||||||
|
//# define NV_USE_ALTIVEC defined(__VEC__)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifndef NV_USE_SSE
|
#ifndef NV_USE_SSE
|
||||||
# if NV_CPU_X86 || NV_CPU_X86_64
|
# if NV_CPU_X86 || NV_CPU_X86_64
|
||||||
# define NV_USE_SSE 2
|
# define NV_USE_SSE 2
|
||||||
# endif
|
# endif
|
||||||
|
//# if defined(__SSE2__)
|
||||||
|
//# define NV_USE_SSE 2
|
||||||
|
//# elif defined(__SSE__)
|
||||||
|
//# define NV_USE_SSE 1
|
||||||
|
//# else
|
||||||
|
//# define NV_USE_SSE 0
|
||||||
|
//# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
// Internally set NV_USE_SIMD when either altivec or sse is available.
|
||||||
|
#if NV_USE_ALTIVEC && NV_USE_SSE
|
||||||
|
# error "Cannot enable both altivec and sse!"
|
||||||
|
#endif
|
||||||
|
|
||||||
#if NV_USE_ALTIVEC
|
#if NV_USE_ALTIVEC
|
||||||
# include "SimdVector_VE.h"
|
# include "SimdVector_VE.h"
|
||||||
|
@ -46,10 +46,10 @@ namespace nv {
|
|||||||
explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {}
|
explicit SimdVector(float f) : vec(_mm_set1_ps(f)) {}
|
||||||
explicit SimdVector(__m128 v) : vec(v) {}
|
explicit SimdVector(__m128 v) : vec(v) {}
|
||||||
|
|
||||||
explicit SimdVector(Vector4::Arg v)
|
/*explicit SimdVector(const Vector4 & v)
|
||||||
{
|
{
|
||||||
vec = _mm_load_ps( v.component );
|
vec = _mm_load_ps( v.components );
|
||||||
}
|
}*/
|
||||||
|
|
||||||
explicit SimdVector(const float * v)
|
explicit SimdVector(const float * v)
|
||||||
{
|
{
|
||||||
|
@ -83,6 +83,7 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
|||||||
m_xxsum = SimdVector( 0.0f );
|
m_xxsum = SimdVector( 0.0f );
|
||||||
m_xsum = SimdVector( 0.0f );
|
m_xsum = SimdVector( 0.0f );
|
||||||
#else
|
#else
|
||||||
|
m_xxsum = Vector3(0.0f);
|
||||||
m_xsum = Vector3(0.0f);
|
m_xsum = Vector3(0.0f);
|
||||||
m_wsum = 0.0f;
|
m_wsum = 0.0f;
|
||||||
#endif
|
#endif
|
||||||
@ -91,11 +92,12 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
|||||||
{
|
{
|
||||||
int p = order[i];
|
int p = order[i];
|
||||||
#if NVTT_USE_SIMD
|
#if NVTT_USE_SIMD
|
||||||
m_weighted[i] = SimdVector(Vector4(set->weights[p] * values[p], set->weights[p]));
|
Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
|
||||||
|
m_weighted[i] = SimdVector(tmp.component);
|
||||||
m_xxsum += m_weighted[i] * m_weighted[i];
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
||||||
m_xsum += m_weighted[i];
|
m_xsum += m_weighted[i];
|
||||||
#else
|
#else
|
||||||
m_weighted[i] = values[p];
|
m_weighted[i] = values[p] * set->weights[p];
|
||||||
m_xxsum += m_weighted[i] * m_weighted[i];
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
||||||
m_xsum += m_weighted[i];
|
m_xsum += m_weighted[i];
|
||||||
m_weights[i] = set->weights[p];
|
m_weights[i] = set->weights[p];
|
||||||
@ -108,7 +110,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
|||||||
void ClusterFit::setMetric(Vector4::Arg w)
|
void ClusterFit::setMetric(Vector4::Arg w)
|
||||||
{
|
{
|
||||||
#if NVTT_USE_SIMD
|
#if NVTT_USE_SIMD
|
||||||
m_metric = SimdVector(Vector4(w.xyz(), 1));
|
Vector4 tmp(w.xyz(), 1);
|
||||||
|
m_metric = SimdVector(tmp.component);
|
||||||
#else
|
#else
|
||||||
m_metric = w.xyz();
|
m_metric = w.xyz();
|
||||||
#endif
|
#endif
|
||||||
@ -333,9 +336,6 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
|
|||||||
bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
|
bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
|
||||||
{
|
{
|
||||||
const uint count = m_count;
|
const uint count = m_count;
|
||||||
const Vector3 one( 1.0f );
|
|
||||||
const Vector3 zero( 0.0f );
|
|
||||||
const Vector3 half( 0.5f );
|
|
||||||
const Vector3 grid( 31.0f, 63.0f, 31.0f );
|
const Vector3 grid( 31.0f, 63.0f, 31.0f );
|
||||||
const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
const Vector3 gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
||||||
|
|
||||||
@ -372,10 +372,10 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
|
|||||||
Vector3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
|
Vector3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
|
||||||
|
|
||||||
// clamp to the grid
|
// clamp to the grid
|
||||||
a = min(one, max(zero, a));
|
a = clamp(a, 0, 1);
|
||||||
b = min(one, max(zero, b));
|
b = clamp(b, 0, 1);
|
||||||
a = floor(grid * a + half) * gridrcp;
|
a = floor(grid * a + 0.5f) * gridrcp;
|
||||||
b = floor(grid * b + half) * gridrcp;
|
b = floor(grid * b + 0.5f) * gridrcp;
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
||||||
@ -420,9 +420,6 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
|
|||||||
bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
|
bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
|
||||||
{
|
{
|
||||||
const uint count = m_count;
|
const uint count = m_count;
|
||||||
Vector3 const one( 1.0f );
|
|
||||||
Vector3 const zero( 0.0f );
|
|
||||||
Vector3 const half( 0.5f );
|
|
||||||
Vector3 const grid( 31.0f, 63.0f, 31.0f );
|
Vector3 const grid( 31.0f, 63.0f, 31.0f );
|
||||||
Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
Vector3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
|
||||||
|
|
||||||
@ -462,10 +459,10 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
|
|||||||
Vector3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
|
Vector3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
|
||||||
|
|
||||||
// clamp to the grid
|
// clamp to the grid
|
||||||
a = min( one, max( zero, a ) );
|
a = clamp(a, 0, 1);
|
||||||
b = min( one, max( zero, b ) );
|
b = clamp(b, 0, 1);
|
||||||
a = floor( grid*a + half )*gridrcp;
|
a = floor(a * grid + 0.5f) * gridrcp;
|
||||||
b = floor( grid*b + half )*gridrcp;
|
b = floor(b * grid + 0.5f) * gridrcp;
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
||||||
|
@ -27,11 +27,13 @@
|
|||||||
#ifndef NVTT_CLUSTERFIT_H
|
#ifndef NVTT_CLUSTERFIT_H
|
||||||
#define NVTT_CLUSTERFIT_H
|
#define NVTT_CLUSTERFIT_H
|
||||||
|
|
||||||
#define NVTT_USE_SIMD 0
|
|
||||||
|
|
||||||
#include "nvmath/SimdVector.h"
|
#include "nvmath/SimdVector.h"
|
||||||
#include "nvmath/Vector.h"
|
#include "nvmath/Vector.h"
|
||||||
|
|
||||||
|
// Use SIMD version if altivec or SSE are available.
|
||||||
|
//#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE)
|
||||||
|
#define NVTT_USE_SIMD 0
|
||||||
|
|
||||||
namespace nv {
|
namespace nv {
|
||||||
|
|
||||||
struct ColorSet;
|
struct ColorSet;
|
||||||
|
@ -109,7 +109,7 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
|
|||||||
QuickCompress::compressDXT5(rgba, block);
|
QuickCompress::compressDXT5(rgba, block);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
#if 1
|
||||||
void NormalCompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void NormalCompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
set.setUniformWeights();
|
set.setUniformWeights();
|
||||||
|
@ -64,7 +64,7 @@ namespace nv
|
|||||||
|
|
||||||
|
|
||||||
// Normal CPU compressors.
|
// Normal CPU compressors.
|
||||||
#if 0
|
#if 1
|
||||||
struct NormalCompressorDXT1 : public ColorSetCompressor
|
struct NormalCompressorDXT1 : public ColorSetCompressor
|
||||||
{
|
{
|
||||||
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||||
|
@ -115,6 +115,7 @@ inline static void insetBBox(Vector3 * restrict maxColor, Vector3 * restrict min
|
|||||||
*minColor = clamp(*minColor + inset, 0.0f, 255.0f);
|
*minColor = clamp(*minColor + inset, 0.0f, 255.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Takes a normalized color in [0, 255] range and returns
|
||||||
inline static uint16 roundAndExpand(Vector3 * restrict v)
|
inline static uint16 roundAndExpand(Vector3 * restrict v)
|
||||||
{
|
{
|
||||||
uint r = uint(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f);
|
uint r = uint(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f);
|
||||||
@ -168,6 +169,7 @@ inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColo
|
|||||||
return indices;
|
return indices;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maxColor and minColor are expected to be in the same range as the color set.
|
||||||
inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
|
inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
|
||||||
{
|
{
|
||||||
Vector3 palette[4];
|
Vector3 palette[4];
|
||||||
@ -224,6 +226,7 @@ inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg
|
|||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// maxColor and minColor are expected to be in the same range as the color set.
|
||||||
inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
|
inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
|
||||||
{
|
{
|
||||||
Vector3 palette[4];
|
Vector3 palette[4];
|
||||||
@ -702,8 +705,8 @@ void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock,
|
|||||||
|
|
||||||
void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
|
void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
|
||||||
{
|
{
|
||||||
Vector3 maxColor = start * 255;
|
Vector3 minColor = start * 255;
|
||||||
Vector3 minColor = end * 255;
|
Vector3 maxColor = end * 255;
|
||||||
uint16 color0 = roundAndExpand(&maxColor);
|
uint16 color0 = roundAndExpand(&maxColor);
|
||||||
uint16 color1 = roundAndExpand(&minColor);
|
uint16 color1 = roundAndExpand(&minColor);
|
||||||
|
|
||||||
@ -715,17 +718,17 @@ void QuickCompress::outputBlock4(const ColorSet & set, const Vector3 & start, co
|
|||||||
|
|
||||||
block->col0 = Color16(color0);
|
block->col0 = Color16(color0);
|
||||||
block->col1 = Color16(color1);
|
block->col1 = Color16(color1);
|
||||||
block->indices = computeIndices4(set, maxColor, minColor);
|
block->indices = computeIndices4(set, maxColor / 255, minColor / 255);
|
||||||
|
|
||||||
//optimizeEndPoints4(set, block);
|
//optimizeEndPoints4(set, block);
|
||||||
}
|
}
|
||||||
|
|
||||||
void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
|
void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
|
||||||
{
|
{
|
||||||
Vector3 maxColor = start * 255;
|
Vector3 minColor = start * 255;
|
||||||
Vector3 minColor = end * 255;
|
Vector3 maxColor = end * 255;
|
||||||
uint16 color0 = roundAndExpand(&maxColor);
|
uint16 color0 = roundAndExpand(&minColor);
|
||||||
uint16 color1 = roundAndExpand(&minColor);
|
uint16 color1 = roundAndExpand(&maxColor);
|
||||||
|
|
||||||
if (color0 > color1)
|
if (color0 > color1)
|
||||||
{
|
{
|
||||||
@ -735,7 +738,7 @@ void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, co
|
|||||||
|
|
||||||
block->col0 = Color16(color0);
|
block->col0 = Color16(color0);
|
||||||
block->col1 = Color16(color1);
|
block->col1 = Color16(color1);
|
||||||
block->indices = computeIndices3(set, maxColor, minColor);
|
block->indices = computeIndices3(set, maxColor / 255, minColor / 255);
|
||||||
|
|
||||||
//optimizeEndPoints3(set, block);
|
//optimizeEndPoints3(set, block);
|
||||||
}
|
}
|
Loading…
Reference in New Issue
Block a user