1 Commits
2.0.7 ... 2.0.6

Author SHA1 Message Date
de8f0153c0 Tag 2.0.6 for release. 2009-03-19 19:06:30 +00:00
20 changed files with 879 additions and 987 deletions

View File

@ -1,10 +1,3 @@
NVIDIA Texture Tools version 2.0.7
* Output correct exit codes. Fixes issue 92.
* Fix thread-safety errors. Fixes issue 90.
* Add SIMD power method. Fixes issue 94.
* Interact better with applications that already use CUDA.
* Faster CPU compression.
NVIDIA Texture Tools version 2.0.6
* Fix dll version checking.
* Detect CUDA 2.1 and future CUDA versions correctly.

View File

@ -1 +1 @@
2.0.7
2.0.6

View File

@ -105,8 +105,7 @@ ENDIF(OPENEXR_FOUND)
FIND_PACKAGE(Qt4)
# Threads
FIND_PACKAGE(Threads REQUIRED)
MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}")
FIND_PACKAGE(Threads)
# configuration file
INCLUDE(CheckIncludeFiles)

View File

@ -38,7 +38,7 @@
# include <unistd.h> // getpid
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
# include <sys/ucontext.h>
# include <ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace

View File

@ -115,7 +115,6 @@ namespace nv
{
NVCORE_API void dumpInfo();
// These functions are not thread safe.
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();

View File

@ -545,6 +545,8 @@ const char * Path::extension(const char * str)
}
// static
String String::s_null(String::null);
/// Clone this string
String String::clone() const
@ -555,13 +557,13 @@ String String::clone() const
void String::setString(const char * str)
{
if (str == NULL) {
data = NULL;
if( str == NULL ) {
data = s_null.data;
}
else {
allocString( str );
addRef();
}
addRef();
}
void String::setString(const char * str, int length)
@ -574,11 +576,11 @@ void String::setString(const char * str, int length)
void String::setString(const StringBuilder & str)
{
if (str.str() == NULL) {
data = NULL;
if( str.str() == NULL ) {
data = s_null.data;
}
else {
allocString(str);
addRef();
}
addRef();
}

View File

@ -151,14 +151,15 @@ namespace nv
/// Constructs a null string. @sa isNull()
String()
{
data = NULL;
data = s_null.data;
addRef();
}
/// Constructs a shared copy of str.
String(const String & str)
{
data = str.data;
if (data != NULL) addRef();
addRef();
}
/// Constructs a shared string from a standard string.
@ -182,6 +183,7 @@ namespace nv
/// Dtor.
~String()
{
nvDebugCheck(data != NULL);
release();
}
@ -218,49 +220,43 @@ namespace nv
/// Equal operator.
bool operator==( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return true;
}
if ((data == NULL) != (str.data == NULL)) {
return false;
}
return strcmp(data, str.data) == 0;
}
/// Equal operator.
bool operator==( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) == 0;
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return false;
}
if ((data == NULL) != (str.data == NULL)) {
return true;
}
return strcmp(data, str.data) != 0;
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) != 0;
}
/// Returns true if this string is the null string.
bool isNull() const { return data == NULL; }
bool isNull() const { nvDebugCheck(data != NULL); return data == s_null.data; }
/// Return the exact length.
uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); }
@ -269,45 +265,44 @@ namespace nv
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
/// const char * cast operator.
operator const char * () const { return data; }
operator const char * () const { nvDebugCheck(data != NULL); return data; }
/// Get string pointer.
const char * str() const { return data; }
const char * str() const { nvDebugCheck(data != NULL); return data; }
private:
// Add reference count.
void addRef()
{
if (data != NULL)
{
setRefCount(getRefCount() + 1);
enum null_t { null };
// Private constructor for null string.
String(null_t) {
setString("");
}
// Add reference count.
void addRef() {
nvDebugCheck(data != NULL);
setRefCount(getRefCount() + 1);
}
// Decrease reference count.
void release()
{
if (data != NULL)
{
void release() {
nvDebugCheck(data != NULL);
const uint16 count = getRefCount();
setRefCount(count - 1);
if (count - 1 == 0) {
if( count - 1 == 0 ) {
mem::free(data - 2);
data = NULL;
}
}
}
uint16 getRefCount() const
{
nvDebugCheck(data != NULL);
uint16 getRefCount() const {
return *reinterpret_cast<const uint16 *>(data - 2);
}
void setRefCount(uint16 count) {
nvDebugCheck(data != NULL);
nvCheck(count < 0xFFFF);
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
}
@ -346,6 +341,8 @@ namespace nv
private:
NVCORE_API static String s_null;
const char * data;
};

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (0 << 8) | (7); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (6); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;

View File

@ -78,7 +78,7 @@ void Image::unwrap()
void Image::free()
{
nv::mem::free(m_data);
::free(m_data);
m_data = NULL;
}

View File

@ -332,7 +332,7 @@ inline Matrix transpose(Matrix::Arg m)
Matrix r;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; j++)
for (int j = 0; j < 4; i++)
{
r(i, j) = m(j, i);
}

View File

@ -205,9 +205,9 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
ColorBlock rgba;
BlockDXT1 block;
squish::WeightedClusterFit fit;
//squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
//squish::FastClusterFit fit;
squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
@ -221,7 +221,7 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}

View File

@ -207,16 +207,28 @@ Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = false;
m.cudaDevice = -1;
m.cudaEnabled = m.cudaSupported;
enableCudaAcceleration(m.cudaSupported);
if (m.cudaEnabled)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
Compressor::~Compressor()
{
enableCudaAcceleration(false);
delete &m;
cuda::exit();
}
@ -225,33 +237,21 @@ void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
if (m.cudaEnabled && !enable)
m.cudaEnabled = enable;
}
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
if (m.cudaDevice != -1)
{
// Exit device.
cuda::exitDevice();
}
}
else if (!m.cudaEnabled && enable)
{
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
if (m.cudaEnabled)
{
// Create compressor if initialization succeeds.
m.cuda = new CudaCompressor();
// But cleanup if failed.
if (!m.cuda->isValid())
{
enableCudaAcceleration(false);
}
}
}
}
}

View File

@ -63,12 +63,10 @@ namespace nvtt
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
int cudaDevice;
nv::AutoPtr<nv::CudaCompressor> cuda;

View File

@ -94,7 +94,7 @@ void InputOptions::reset()
m.textureType = TextureType_2D;
m.inputFormat = InputFormat_BGRA_8UB;
m.alphaMode = AlphaMode_None;
m.alphaMode = AlphaMode_Transparency;
m.inputGamma = 2.2f;
m.outputGamma = 2.2f;

View File

@ -41,11 +41,11 @@ using namespace cuda;
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
@ -53,20 +53,20 @@ typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
return !bIsWow64;
}
#endif
@ -158,8 +158,6 @@ bool nv::cuda::isHardwarePresent()
// @@ Make sure that warp size == 32
// @@ Make sure available GPU is faster than the CPU.
return count > 0;
#else
return false;
@ -182,59 +180,30 @@ int nv::cuda::deviceCount()
return 0;
}
// Make sure device meets requirements:
// - Not an emulation device.
// - Not an integrated device?
// - Faster than CPU.
bool nv::cuda::isValidDevice(int i)
{
#if defined HAVE_CUDA
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major == -1 || device_properties.minor == -1) {
// Emulation device.
return false;
}
#if CUDART_VERSION >= 2030 // 2.3
/*if (device_properties.integrated)
{
// Integrated devices.
return false;
}*/
#endif
return true;
#else
return false;
#endif
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = -1;
int max_gflops_device = 0;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
for (int i = 0; i < device_count; i++)
{
if (isValidDevice(i))
int current_device = 0;
while (current_device < device_count)
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
cudaGetDeviceProperties(&device_properties, current_device);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (gflops > max_gflops)
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
{
max_gflops = gflops;
max_gflops_device = i;
max_gflops_device = current_device;
}
}
current_device++;
}
#endif
return max_gflops_device;
@ -242,53 +211,23 @@ int nv::cuda::getFastestDevice()
/// Activate the given devices.
bool nv::cuda::initDevice(int * device_ptr)
bool nv::cuda::setDevice(int i)
{
nvDebugCheck(device_ptr != NULL);
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
#if CUDART_VERSION >= 2030 // 2.3
// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
#endif
int device = getFastestDevice();
if (device == -1)
{
// No device is fast enough.
*device_ptr = -1;
return false;
}
// Select CUDA device.
cudaError_t result = cudaSetDevice(device);
if (result == cudaErrorSetOnActiveProcess)
{
int device;
result = cudaGetDevice(&device);
*device_ptr = -1; // No device to cleanup.
return isValidDevice(device); // Return true if device is valid.
}
else if (result != cudaSuccess)
{
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
*device_ptr = -1;
return false;
}
*device_ptr = device;
return true;
return result == cudaSuccess;
#else
return false;
#endif
}
void nv::cuda::exitDevice()
void nv::cuda::exit()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();

View File

@ -32,10 +32,8 @@ namespace nv
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool isValidDevice(int i);
bool initDevice(int * device_ptr);
void exitDevice();
bool setDevice(int i);
void exit();
};
} // nv namespace

View File

@ -73,7 +73,7 @@ namespace nvtt
Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
// DX10 formats.
Format_BC1 = Format_DXT1,
@ -194,7 +194,7 @@ namespace nvtt
// Describe the format of the input.
NVTT_API void setFormat(InputFormat format);
// Set the way the input alpha channel is interpreted.
// Set the way the input alpha channel is interpreted. @@ Not implemented!
NVTT_API void setAlphaMode(AlphaMode alphaMode);
// Set gamma settings.

View File

@ -24,7 +24,6 @@
-------------------------------------------------------------------------- */
#include "maths.h"
#include "simd.h"
#include <cfloat>
namespace squish {
@ -61,39 +60,12 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
}
#define POWER_ITERATION_COUNT 8
#if SQUISH_USE_SIMD
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
Vec4 v = VEC4_CONST( 1.0f );
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
{
// matrix multiply
Vec4 w = row0*v.SplatX();
w = MultiplyAdd(row1, v.SplatY(), w);
w = MultiplyAdd(row2, v.SplatZ(), w);
const int NUM = 8;
// get max component from xyz in all channels
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
// divide through and advance
v = w*Reciprocal(a);
}
return v.GetVec3();
}
#else
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 v(1, 1, 1);
for (int i = 0; i < POWER_ITERATION_COUNT; i++)
{
for(int i = 0; i < NUM; i++) {
float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
@ -110,6 +82,5 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
return v;
}
#endif
} // namespace squish

View File

@ -1,28 +1,28 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
-------------------------------------------------------------------------- */
#include "weightedclusterfit.h"
#include "colourset.h"
@ -32,12 +32,12 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
namespace squish {
WeightedClusterFit::WeightedClusterFit()
{
}
WeightedClusterFit::WeightedClusterFit()
{
}
void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
{
void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
{
ColourFit::SetColourSet( colours, flags );
// initialise the best error
@ -102,21 +102,21 @@ namespace squish {
m_wsum += m_weights[i];
#endif
}
}
}
void WeightedClusterFit::SetMetric(float r, float g, float b)
{
void WeightedClusterFit::SetMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD
m_metric = Vec4(r, g, b, 0);
#else
m_metric = Vec3(r, g, b);
#endif
m_metricSqr = m_metric * m_metric;
}
}
float WeightedClusterFit::GetBestError() const
{
float WeightedClusterFit::GetBestError() const
{
#if SQUISH_USE_SIMD
Vec4 x = m_xxsum * m_metricSqr;
Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ();
@ -125,19 +125,16 @@ namespace squish {
return m_besterror + Dot(m_xxsum, m_metricSqr);
#endif
}
}
#if SQUISH_USE_SIMD
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
void WeightedClusterFit::Compress3( void* block )
{
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -149,11 +146,11 @@ namespace squish {
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 const x2 = m_xsum - x1 - x0;
@ -176,21 +173,24 @@ namespace squish {
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -221,17 +221,17 @@ namespace squish {
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
// save the block
@ -240,20 +240,16 @@ namespace squish {
// save the error
m_besterror = besterror;
}
}
}
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
void WeightedClusterFit::Compress4( void* block )
{
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -264,30 +260,30 @@ namespace squish {
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec4 x2 = zero;
for( int c2 = 0; c2 <= count-c0-c1; c2++)
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
Vec4 const x3 = m_xsum - x2 - x1 - x0;
//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum
Vec4 const alpha2_sum = alphax_sum.SplatW();
//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum
Vec4 const beta2_sum = betax_sum.SplatW();
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum
//float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f);
Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
@ -295,21 +291,24 @@ namespace squish {
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// apply the metric to the error term
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -347,37 +346,28 @@ namespace squish {
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
// save the error
m_besterror = besterror;
}
}
}
#else
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
void WeightedClusterFit::Compress3( void* block )
{
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -389,12 +379,12 @@ namespace squish {
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
float w2 = m_wsum - w0 - w1;
@ -410,9 +400,16 @@ namespace squish {
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -453,35 +450,26 @@ namespace squish {
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock3( beststart, bestend, bestindices, block );
WriteColourBlock3( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;
}
}
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
}
void WeightedClusterFit::Compress4( void* block )
{
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -492,17 +480,17 @@ namespace squish {
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= count; c0++)
for( int c0 = 0; c0 <= 16; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= count-c0; c1++)
for( int c1 = 0; c1 <= 16-c0; c1++)
{
Vec3 x2(0.0f);
float w2 = 0.0f;
for( int c2 = 0; c2 <= count-c0-c1; c2++)
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
float w3 = m_wsum - w0 - w1 - w2;
@ -517,9 +505,16 @@ namespace squish {
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp to the grid
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -568,25 +563,23 @@ namespace squish {
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < count; i++) {
for(; i < 16; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < count; ++i )
for( int i = 0; i < 16; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart, bestend, bestindices, block );
WriteColourBlock4( beststart, bestend, ordered, block );
// save the error
m_besterror = besterror;
}
}
}
#endif

View File

@ -87,10 +87,7 @@ struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
#if _DEBUG
nvDebugBreak();
#endif
printf("Error: '%s'\n", nvtt::errorString(e));
}
};
@ -257,12 +254,7 @@ int main(int argc, char *argv[])
}
}
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor);
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
{
@ -289,7 +281,7 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
return EXIT_FAILURE;
return 1;
}
// @@ Make sure input file exists.
@ -304,13 +296,13 @@ int main(int argc, char *argv[])
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return EXIT_FAILURE;
return 1;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return EXIT_FAILURE;
return 1;
}
uint faceCount;
@ -347,7 +339,7 @@ int main(int argc, char *argv[])
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return EXIT_FAILURE;
return 1;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
@ -410,7 +402,7 @@ int main(int argc, char *argv[])
if (outputHandler.stream->isError())
{
fprintf(stderr, "Error opening '%s' for writting\n", output.str());
return EXIT_FAILURE;
return 1;
}
nvtt::Compressor compressor;
@ -438,16 +430,27 @@ int main(int argc, char *argv[])
// fflush(stdout);
// getchar();
/* LARGE_INTEGER temp;
QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
double freq = ((double) temp.QuadPart) / 1000.0;
LARGE_INTEGER start_time;
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
*/
clock_t start = clock();
if (!compressor.process(inputOptions, compressionOptions, outputOptions))
{
return EXIT_FAILURE;
}
compressor.process(inputOptions, compressionOptions, outputOptions);
/*
LARGE_INTEGER end_time;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
printf("\rtime taken: %.3f seconds\n", diff_time/1000);
*/
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return EXIT_SUCCESS;
return 0;
}