1 Commits
2.0.8 ... 2.0.6

Author SHA1 Message Date
de8f0153c0 Tag 2.0.6 for release. 2009-03-19 19:06:30 +00:00
28 changed files with 905 additions and 1078 deletions

View File

@ -1,16 +1,3 @@
NVIDIA Texture Tools version 2.0.8
* Fix float to fixed image conversion. Patch provided by Alex Pfaffe. Fixes issue 121.
* ColorBlock::isSingleColor compares only RGB channels. Fixes issue 115.
* Fix cmake build in msvc. Fixes issue 111.
* Better estimate principal component. Fixes issue 120.
NVIDIA Texture Tools version 2.0.7
* Output correct exit codes. Fixes issue 92.
* Fix thread-safety errors. Fixes issue 90.
* Add SIMD power method. Fixes issue 94.
* Interact better with applications that already use CUDA.
* Faster CPU compression.
NVIDIA Texture Tools version 2.0.6
* Fix dll version checking.
* Detect CUDA 2.1 and future CUDA versions correctly.

View File

@ -1 +1 @@
2.0.8
2.0.6

View File

@ -53,7 +53,11 @@ ENDIF(ZLIB_FOUND)
IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY)
SET(OPENEXR_FOUND TRUE)
SET(OPENEXR_INCLUDE_PATHS ${OPENEXR_INCLUDE_PATH} CACHE STRING "The include paths needed to use OpenEXR")
SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${OPENEXR_ILMTHREAD_LIBRARY} ${ZLIB_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR")
SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${ZLIB_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR")
IF(OPENEXR_ILMTHREAD_LIBRARY)
SET(OPENEXR_LIBRARIES ${OPENEXR_LIBRARIES} ${OPENEXR_ILMTHREAD_LIBRARY})
ENDIF(OPENEXR_ILMTHREAD_LIBRARY)
ENDIF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY)
IF(OPENEXR_FOUND)

View File

@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 0, 8, 0"
VALUE "FileVersion", "2, 0, 6, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007-2010"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 0, 8, 0"
VALUE "ProductVersion", "2, 0, 6, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -179,8 +179,6 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -346,8 +344,6 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -507,8 +503,6 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -670,8 +664,6 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
</Configurations>

View File

@ -105,8 +105,7 @@ ENDIF(OPENEXR_FOUND)
FIND_PACKAGE(Qt4)
# Threads
FIND_PACKAGE(Threads REQUIRED)
MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}")
FIND_PACKAGE(Threads)
# configuration file
INCLUDE(CheckIncludeFiles)

View File

@ -33,7 +33,6 @@ IF(UNIX)
ENDIF(UNIX)
IF(NVCORE_SHARED)
ADD_DEFINITIONS(-DNVCORE_SHARED=1)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED)
ADD_LIBRARY(nvcore ${CORE_SRCS})

View File

@ -38,7 +38,7 @@
# include <unistd.h> // getpid
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
# include <sys/ucontext.h>
# include <ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace

View File

@ -115,7 +115,6 @@ namespace nv
{
NVCORE_API void dumpInfo();
// These functions are not thread safe.
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();

View File

@ -545,6 +545,8 @@ const char * Path::extension(const char * str)
}
// static
String String::s_null(String::null);
/// Clone this string
String String::clone() const
@ -555,13 +557,13 @@ String String::clone() const
void String::setString(const char * str)
{
if (str == NULL) {
data = NULL;
if( str == NULL ) {
data = s_null.data;
}
else {
allocString( str );
addRef();
}
addRef();
}
void String::setString(const char * str, int length)
@ -574,11 +576,11 @@ void String::setString(const char * str, int length)
void String::setString(const StringBuilder & str)
{
if (str.str() == NULL) {
data = NULL;
if( str.str() == NULL ) {
data = s_null.data;
}
else {
allocString(str);
addRef();
}
addRef();
}

View File

@ -17,7 +17,7 @@ namespace nv
/// String hash based on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381)
{
uint i = 0;
uint i;
while(data[i] != 0) {
h = (33 * h) ^ uint(data[i]);
i++;
@ -151,14 +151,15 @@ namespace nv
/// Constructs a null string. @sa isNull()
String()
{
data = NULL;
data = s_null.data;
addRef();
}
/// Constructs a shared copy of str.
String(const String & str)
{
data = str.data;
if (data != NULL) addRef();
addRef();
}
/// Constructs a shared string from a standard string.
@ -182,6 +183,7 @@ namespace nv
/// Dtor.
~String()
{
nvDebugCheck(data != NULL);
release();
}
@ -218,49 +220,43 @@ namespace nv
/// Equal operator.
bool operator==( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return true;
}
if ((data == NULL) != (str.data == NULL)) {
return false;
}
return strcmp(data, str.data) == 0;
}
/// Equal operator.
bool operator==( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) == 0;
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return false;
}
if ((data == NULL) != (str.data == NULL)) {
return true;
}
return strcmp(data, str.data) != 0;
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) != 0;
}
/// Returns true if this string is the null string.
bool isNull() const { return data == NULL; }
bool isNull() const { nvDebugCheck(data != NULL); return data == s_null.data; }
/// Return the exact length.
uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); }
@ -269,45 +265,44 @@ namespace nv
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
/// const char * cast operator.
operator const char * () const { return data; }
operator const char * () const { nvDebugCheck(data != NULL); return data; }
/// Get string pointer.
const char * str() const { return data; }
const char * str() const { nvDebugCheck(data != NULL); return data; }
private:
enum null_t { null };
// Private constructor for null string.
String(null_t) {
setString("");
}
// Add reference count.
void addRef()
{
if (data != NULL)
{
setRefCount(getRefCount() + 1);
}
void addRef() {
nvDebugCheck(data != NULL);
setRefCount(getRefCount() + 1);
}
// Decrease reference count.
void release()
{
if (data != NULL)
{
const uint16 count = getRefCount();
setRefCount(count - 1);
if (count - 1 == 0) {
mem::free(data - 2);
data = NULL;
}
void release() {
nvDebugCheck(data != NULL);
const uint16 count = getRefCount();
setRefCount(count - 1);
if( count - 1 == 0 ) {
mem::free(data - 2);
data = NULL;
}
}
uint16 getRefCount() const
{
nvDebugCheck(data != NULL);
uint16 getRefCount() const {
return *reinterpret_cast<const uint16 *>(data - 2);
}
void setRefCount(uint16 count) {
nvDebugCheck(data != NULL);
nvCheck(count < 0xFFFF);
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
}
@ -346,6 +341,8 @@ namespace nv
private:
NVCORE_API static String s_null;
const char * data;
};

View File

@ -53,7 +53,6 @@ ENDIF(OPENEXR_FOUND)
ADD_DEFINITIONS(-DNVIMAGE_EXPORTS)
IF(NVIMAGE_SHARED)
ADD_DEFINITIONS(-DNVIMAGE_SHARED=1)
ADD_LIBRARY(nvimage SHARED ${IMAGE_SRCS})
ELSE(NVIMAGE_SHARED)
ADD_LIBRARY(nvimage ${IMAGE_SRCS})

View File

@ -113,18 +113,15 @@ void ColorBlock::splatY()
/// Returns true if the block has a single color.
bool ColorBlock::isSingleColor() const
{
Color32 mask(0xFF, 0xFF, 0xFF, 0x00);
uint u = m_color[0].u & mask.u;
for (int i = 1; i < 16; i++)
{
if (u != (m_color[i].u & mask.u))
{
return false;
}
}
return true;
for(int i = 1; i < 16; i++)
{
if (m_color[0] != m_color[i])
{
return false;
}
}
return true;
}
/// Count number of unique colors in this color block.

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (0 << 8) | (8); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (6); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;

View File

@ -78,7 +78,7 @@ void Image::unwrap()
void Image::free()
{
nv::mem::free(m_data);
::free(m_data);
m_data = NULL;
}

View File

@ -19,7 +19,6 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVMATH_EXPORTS)
IF(NVMATH_SHARED)
ADD_DEFINITIONS(-DNVMATH_SHARED=1)
ADD_LIBRARY(nvmath SHARED ${MATH_SRCS})
ELSE(NVMATH_SHARED)
ADD_LIBRARY(nvmath ${MATH_SRCS})

View File

@ -332,7 +332,7 @@ inline Matrix transpose(Matrix::Arg m)
Matrix r;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; j++)
for (int j = 0; j < 4; i++)
{
r(i, j) = m(j, i);
}

View File

@ -205,9 +205,9 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
ColorBlock rgba;
BlockDXT1 block;
squish::WeightedClusterFit fit;
//squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
//squish::FastClusterFit fit;
squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
@ -221,7 +221,7 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}

View File

@ -53,7 +53,7 @@ using namespace nvtt;
namespace
{
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a) {
@ -121,13 +121,15 @@ namespace nvtt
m_fixedImage = NULL;
m_floatImage = image;
}
// Convert linear float image to fixed image ready for compression.
void toFixedImage(const InputOptions::Private & inputOptions)
{
if (m_floatImage != NULL) // apfaffe - We should check that we have a float image, if so convert it!
if (this->asFixedImage() == NULL)
{
nvDebugCheck(m_floatImage != NULL);
if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f)
{
m_fixedImage = m_floatImage->createImage();
@ -151,7 +153,7 @@ namespace nvtt
if (inputOptions.isNormalMap)
{
// Expand normals to [-1, 1] range.
// floatImage->expandNormals(0);
// floatImage->expandNormals(0);
}
else if (inputOptions.inputGamma != 1.0f)
{
@ -173,12 +175,11 @@ namespace nvtt
const Image * asFixedImage() const
{
// - apfaffe - switched logic to return the 'processed image' rather than the input!
if (m_fixedImage != NULL && m_fixedImage.ptr() != NULL)
if (m_inputImage != NULL)
{
return m_fixedImage.ptr();
return m_inputImage;
}
return m_inputImage;
return m_fixedImage.ptr();
}
Image * asMutableFixedImage()
@ -192,7 +193,7 @@ namespace nvtt
return m_fixedImage.ptr();
}
private:
const Image * m_inputImage;
AutoPtr<Image> m_fixedImage;
@ -206,16 +207,28 @@ Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = false;
m.cudaDevice = -1;
m.cudaEnabled = m.cudaSupported;
enableCudaAcceleration(m.cudaSupported);
if (m.cudaEnabled)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
Compressor::~Compressor()
{
enableCudaAcceleration(false);
delete &m;
cuda::exit();
}
@ -224,33 +237,21 @@ void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
if (m.cudaEnabled && !enable)
m.cudaEnabled = enable;
}
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
if (m.cudaDevice != -1)
{
// Exit device.
cuda::exitDevice();
}
}
else if (!m.cudaEnabled && enable)
{
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
if (m.cudaEnabled)
{
// Create compressor if initialization succeeds.
m.cuda = new CudaCompressor();
// But cleanup if failed.
if (!m.cuda->isValid())
{
enableCudaAcceleration(false);
}
}
}
}
}
@ -291,9 +292,9 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
return false;
}
inputOptions.computeTargetExtents();
// Output DDS header.
if (!outputHeader(inputOptions, compressionOptions, outputOptions))
{
@ -309,7 +310,7 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
}
outputOptions.closeFile();
return true;
}
@ -324,15 +325,15 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
}
DDSHeader header;
header.setWidth(inputOptions.targetWidth);
header.setHeight(inputOptions.targetHeight);
int mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
header.setMipmapCount(mipmapCount);
if (inputOptions.textureType == TextureType_2D) {
header.setTexture2D();
}
@ -340,10 +341,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setTextureCube();
}
/*else if (inputOptions.textureType == TextureType_3D) {
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
}*/
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
@ -352,7 +353,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
else
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
@ -375,10 +376,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
header.swapBytes();
uint headerSize = 128;
if (header.hasDX10Header())
{
@ -391,7 +392,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
{
outputOptions.errorHandler->error(Error_FileWrite);
}
return writeSucceed;
}
@ -427,7 +428,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
return false;
}
}
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
@ -437,7 +438,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
h = max(1U, h / 2);
d = max(1U, d / 2);
}
return true;
}
@ -488,7 +489,7 @@ int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOpti
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
{
if (inputImage.data != NULL)
@ -543,7 +544,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::
mipmap.toFloatImage(inputOptions);
const FloatImage * floatImage = mipmap.asFloatImage();
if (inputOptions.mipmapFilter == MipmapFilter_Box)
{
// Use fast downsample.
@ -561,7 +562,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::
filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Normalize mipmap.
if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
{
@ -589,7 +590,7 @@ void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions:
if (inputOptions.convertToNormalMap)
{
mipmap.toFixedImage(inputOptions);
Vector4 heightScale = inputOptions.heightFactors;
mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
}
@ -714,29 +715,29 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
#endif
#if defined(HAVE_ATITC)
if (compressionOptions.externalCompressor == "ati")
if (compressionOptions.externalCompressor == "ati")
{
atiCompressDXT1(image, outputOptions);
}
else
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
{
atiCompressDXT1(image, outputOptions);
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1a)
{
@ -827,27 +828,27 @@ int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions
const uint bitCount = compressionOptions.bitcount;
inputOptions.computeTargetExtents();
uint mipmapCount = inputOptions.realMipmapCount();
int size = 0;
for (uint f = 0; f < inputOptions.faceCount; f++)
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
for (uint m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, format);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
}
return size;
}

View File

@ -63,12 +63,10 @@ namespace nvtt
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
int cudaDevice;
nv::AutoPtr<nv::CudaCompressor> cuda;

View File

@ -94,7 +94,7 @@ void InputOptions::reset()
m.textureType = TextureType_2D;
m.inputFormat = InputFormat_BGRA_8UB;
m.alphaMode = AlphaMode_None;
m.alphaMode = AlphaMode_Transparency;
m.inputGamma = 2.2f;
m.outputGamma = 2.2f;

View File

@ -127,21 +127,8 @@ inline __device__ __host__ float3 normalize(float3 v)
inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 row0 = make_float3(matrix[0], matrix[1], matrix[2]);
float3 row1 = make_float3(matrix[1], matrix[3], matrix[4]);
float3 row2 = make_float3(matrix[2], matrix[4], matrix[5]);
float r0 = dot(row0, row0);
float r1 = dot(row1, row1);
float r2 = dot(row2, row2);
float3 v;
if (r0 > r1 && r0 > r2) v = row0;
else if (r1 > r2) v = row1;
else v = row2;
//float3 v = make_float3(1.0f, 1.0f, 1.0f);
float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];

View File

@ -1,300 +1,239 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
// Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
// deviceProp.name != Device Emulation (CPU)
if (deviceProp.major == -1 || deviceProp.minor == -1)
{
return false;
}
}
// @@ Make sure that warp size == 32
// @@ Make sure available GPU is faster than the CPU.
return count > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
// Make sure device meets requirements:
// - Not an emulation device.
// - Not an integrated device?
// - Faster than CPU.
bool nv::cuda::isValidDevice(int i)
{
#if defined HAVE_CUDA
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major == -1 || device_properties.minor == -1) {
// Emulation device.
return false;
}
#if CUDART_VERSION >= 2030 // 2.3
/*if (device_properties.integrated)
{
// Integrated devices.
return false;
}*/
#endif
return true;
#else
return false;
#endif
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = -1;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
for (int i = 0; i < device_count; i++)
{
if (isValidDevice(i))
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (gflops > max_gflops)
{
max_gflops = gflops;
max_gflops_device = i;
}
}
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::initDevice(int * device_ptr)
{
nvDebugCheck(device_ptr != NULL);
#if defined HAVE_CUDA
#if CUDART_VERSION >= 2030 // 2.3
// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
#endif
int device = getFastestDevice();
if (device == -1)
{
// No device is fast enough.
*device_ptr = -1;
return false;
}
// Select CUDA device.
cudaError_t result = cudaSetDevice(device);
if (result == cudaErrorSetOnActiveProcess)
{
int device;
result = cudaGetDevice(&device);
*device_ptr = -1; // No device to cleanup.
return isValidDevice(device); // Return true if device is valid.
}
else if (result != cudaSuccess)
{
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
*device_ptr = -1;
return false;
}
*device_ptr = device;
return true;
#else
return false;
#endif
}
void nv::cuda::exitDevice()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
// Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
// deviceProp.name != Device Emulation (CPU)
if (deviceProp.major == -1 || deviceProp.minor == -1)
{
return false;
}
}
// @@ Make sure that warp size == 32
return count > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = 0;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
int current_device = 0;
while (current_device < device_count)
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
{
max_gflops = gflops;
max_gflops_device = current_device;
}
}
current_device++;
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
return result == cudaSuccess;
#else
return false;
#endif
}
void nv::cuda::exit()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}

View File

@ -32,10 +32,8 @@ namespace nv
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool isValidDevice(int i);
bool initDevice(int * device_ptr);
void exitDevice();
bool setDevice(int i);
void exit();
};
} // nv namespace

View File

@ -73,7 +73,7 @@ namespace nvtt
Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
// DX10 formats.
Format_BC1 = Format_DXT1,
@ -194,7 +194,7 @@ namespace nvtt
// Describe the format of the input.
NVTT_API void setFormat(InputFormat format);
// Set the way the input alpha channel is interpreted.
// Set the way the input alpha channel is interpreted. @@ Not implemented!
NVTT_API void setAlphaMode(AlphaMode alphaMode);
// Set gamma settings.

View File

@ -24,7 +24,6 @@
-------------------------------------------------------------------------- */
#include "maths.h"
#include "simd.h"
#include <cfloat>
namespace squish {
@ -61,61 +60,12 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
}
static Vec3 EstimatePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 const row0(matrix[0], matrix[1], matrix[2]);
Vec3 const row1(matrix[1], matrix[3], matrix[4]);
Vec3 const row2(matrix[2], matrix[4], matrix[5]);
float r0 = Dot(row0, row0);
float r1 = Dot(row1, row1);
float r2 = Dot(row2, row2);
if (r0 > r1 && r0 > r2) return row0;
if (r1 > r2) return row1;
return row2;
}
#define POWER_ITERATION_COUNT 8
#if SQUISH_USE_SIMD
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
const int NUM = 8;
//Vec4 v = VEC4_CONST( 1.0f );
//Vec4 v = row0; // row1, row2
Vec3 v3 = EstimatePrincipleComponent( matrix );
Vec4 v( v3.X(), v3.Y(), v3.Z(), 0.0f );
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
{
// matrix multiply
Vec4 w = row0*v.SplatX();
w = MultiplyAdd(row1, v.SplatY(), w);
w = MultiplyAdd(row2, v.SplatZ(), w);
// get max component from xyz in all channels
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
// divide through and advance
v = w*Reciprocal(a);
}
return v.GetVec3();
}
#else
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 v = EstimatePrincipleComponent( matrix );
for (int i = 0; i < POWER_ITERATION_COUNT; i++)
{
Vec3 v(1, 1, 1);
for(int i = 0; i < NUM; i++) {
float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
@ -132,6 +82,5 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
return v;
}
#endif
} // namespace squish

File diff suppressed because it is too large Load Diff

View File

@ -87,10 +87,7 @@ struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
#if _DEBUG
nvDebugBreak();
#endif
printf("Error: '%s'\n", nvtt::errorString(e));
}
};
@ -134,7 +131,6 @@ int main(int argc, char *argv[])
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool alpha = false;
bool normal = false;
bool color2normal = false;
bool wrapRepeat = false;
@ -158,10 +154,6 @@ int main(int argc, char *argv[])
if (strcmp("-color", argv[i]) == 0)
{
}
else if (strcmp("-alpha", argv[i]) == 0)
{
alpha = true;
}
else if (strcmp("-normal", argv[i]) == 0)
{
normal = true;
@ -262,12 +254,7 @@ int main(int argc, char *argv[])
}
}
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor);
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
{
@ -275,7 +262,6 @@ int main(int argc, char *argv[])
printf("Input options:\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -alpha \tThe input image has an alpha channel used for transparency.\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
@ -295,7 +281,7 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
return EXIT_FAILURE;
return 1;
}
// @@ Make sure input file exists.
@ -310,13 +296,13 @@ int main(int argc, char *argv[])
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return EXIT_FAILURE;
return 1;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return EXIT_FAILURE;
return 1;
}
uint faceCount;
@ -353,7 +339,7 @@ int main(int argc, char *argv[])
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return EXIT_FAILURE;
return 1;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
@ -369,15 +355,6 @@ int main(int argc, char *argv[])
inputOptions.setWrapMode(nvtt::WrapMode_Clamp);
}
if (alpha)
{
inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency);
}
else
{
inputOptions.setAlphaMode(nvtt::AlphaMode_None);
}
if (normal)
{
setNormalMap(inputOptions);
@ -425,7 +402,7 @@ int main(int argc, char *argv[])
if (outputHandler.stream->isError())
{
fprintf(stderr, "Error opening '%s' for writting\n", output.str());
return EXIT_FAILURE;
return 1;
}
nvtt::Compressor compressor;
@ -439,7 +416,7 @@ int main(int argc, char *argv[])
else
{
printf("DISABLED\n\n");
}
}
outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
outputHandler.setDisplayProgress(!silent);
@ -453,16 +430,27 @@ int main(int argc, char *argv[])
// fflush(stdout);
// getchar();
/* LARGE_INTEGER temp;
QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
double freq = ((double) temp.QuadPart) / 1000.0;
LARGE_INTEGER start_time;
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
*/
clock_t start = clock();
if (!compressor.process(inputOptions, compressionOptions, outputOptions))
{
return EXIT_FAILURE;
}
compressor.process(inputOptions, compressionOptions, outputOptions);
/*
LARGE_INTEGER end_time;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
printf("\rtime taken: %.3f seconds\n", diff_time/1000);
*/
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return EXIT_SUCCESS;
return 0;
}