90 Commits
2.0.6 ... 2.0.7

Author SHA1 Message Date
f817d49872 Replace broken compressor with the latest version from trunk. 2009-11-06 01:44:28 +00:00
09ad232142 Update changelog. 2009-11-04 20:34:26 +00:00
69363fcc92 Create 2.0.7 release. 2009-11-04 08:58:19 +00:00
2f0fe5149f Patch posh to support freebsd. 2009-11-03 23:58:10 +00:00
f744e700ae Add doc folder. 2009-11-03 23:57:36 +00:00
48f5dd4603 Update cmake scripts. 2009-11-03 23:56:25 +00:00
78bb864c14 cmake build system update. 2009-11-03 23:55:38 +00:00
7bc1eb6a29 Add ignore lists. 2009-11-03 23:53:44 +00:00
f63abb3ef6 Add properties. 2009-11-03 23:51:41 +00:00
e8500dead4 Add ignore list. 2009-11-03 23:50:14 +00:00
2e1d1e70ae Add ignore properties. 2009-11-03 23:44:05 +00:00
bc3299b78b Add ignore properties. 2009-11-03 23:43:35 +00:00
d01bdaf370 Add ignore properties. 2009-11-03 23:42:52 +00:00
d90b3d927b Update ignore file. 2009-11-03 23:40:10 +00:00
3e5c47d9fb Add ignore properties. 2009-11-03 23:39:03 +00:00
6a667fff50 Do not use external dependencies. 2009-11-03 23:28:18 +00:00
dfe081d32a Update messages. 2009-11-03 23:27:24 +00:00
acc02abaf1 Fix messages. 2009-11-03 23:17:11 +00:00
8871fefe89 Update project uuid. 2009-11-03 23:08:11 +00:00
2543f4c9ed rename stress->testsuite 2009-11-03 18:48:25 +00:00
ac46c40b3e Rename stress->testsuite 2009-11-03 18:48:03 +00:00
bcf0df2b49 use default pixel format, add comment to indicate where to change default 2009-10-30 01:05:31 +00:00
8c7f54056c Add more todo items.
Delete images more efficiently?
2009-10-21 19:20:30 +00:00
34cd266d8c Add todo item to perform color transforms before compression. 2009-10-21 19:19:09 +00:00
9a9366cf4c Who knows what msvc decided to change. 2009-10-21 18:29:46 +00:00
8820c43175 Large refactoring of compressor codes:
- Define compressor interface.
- Implement compressor interface for different compressors.
- Add parallel compressor using OpenMP. Experimental.
- Add generic GPU compressor, so far only DXT1 enabled.
2009-10-21 07:48:27 +00:00
18a3abf794 Enable alpha dithering when using DXT3.
Update timing message.
2009-10-21 07:43:24 +00:00
384f74ba39 Use minimal set by default. 2009-10-21 07:42:33 +00:00
7d75840398 Add todo messages.
Use DXT3 nvidia decoder if requested.
2009-10-21 07:42:08 +00:00
8ea52efbf4 Add DXT3 nvidia decoder. 2009-10-21 07:40:49 +00:00
d86a89742e Update info message. 2009-10-21 07:40:23 +00:00
fd11f5e7ef Implement generic swizzle, remove specialized ones. 2009-10-21 07:39:59 +00:00
dcfdabaee3 Fix timer. 2009-10-21 07:39:08 +00:00
2d97ee9c03 Update project files. Many minor changes. Enable OpenMP. 2009-10-21 07:38:11 +00:00
14f49b6003 Define new macro to point to extern dir. 2009-10-21 07:37:21 +00:00
ea7dabc6b1 Add comment. 2009-10-19 05:42:27 +00:00
0878c0e967 Add expand and pack normal methods.
Set normal map flag.
2009-10-18 20:04:39 +00:00
a088ae5789 Implement normal map generation for floating point images. 2009-10-18 20:03:21 +00:00
a52d3b7cdc Tweak implementation of scaleBias. 2009-10-18 20:02:43 +00:00
307c418acc Update FindCUDA script to latest. 2009-10-18 08:11:51 +00:00
d0218cb18b Merge changes from 2.0 2009-10-18 08:10:28 +00:00
c1f9c4df42 Create default output handle on setFileName to avoid modifying const argument.
Fix ref counting errors in TexImage.
Format TODO messages.
2009-10-18 08:09:20 +00:00
78d65e8368 When compiling with gcc, define NV_FILE_LINE using gcc convention. 2009-10-18 08:04:25 +00:00
18474cdb33 Some more progress towards 2.1:
- Add raw input methods in context.
- Implement some of the TexImage input methods in context.
- Add output header context method for TexImage.
2009-10-12 07:56:02 +00:00
b7fbd1fc9b Fix error in Snow Leopard. 2009-10-12 00:44:03 +00:00
9de3298d6b Fix cmake build. 2009-10-11 23:51:22 +00:00
568f34d838 Fix color weighted compression for single color blocks. Fixes issue 96. 2009-09-21 18:46:48 +00:00
e38e584db2 Rename texture to teximage. 2009-09-14 22:43:53 +00:00
8655259379 Fix comment. 2009-08-26 01:27:50 +00:00
7d65633f63 Add files to repro bugs/request. 2009-07-28 08:07:48 +00:00
cb62c3c461 Add support for R16 in DDS headers.
Cleanup DDS header output code.
Temporary testing code added to nvcompress.
2009-07-28 08:05:23 +00:00
573cc1b371 Add support for UINT16 images when using freetype. 2009-07-28 08:03:36 +00:00
9c6f6e143e Use tabs not spaces. 2009-07-28 08:03:08 +00:00
126816ef72 Experimental quality improvements and speed optimizations. 2009-07-06 09:08:09 +00:00
2ca6e4a1bd Add support function for stb compressor. 2009-07-06 09:06:40 +00:00
b839b873e1 Remove commented out code. 2009-07-06 09:06:17 +00:00
ab473f4ec5 Add DXT5 tests.
Use timer class.
2009-07-06 09:05:54 +00:00
f1ebbd4da6 Add more third-party compressors. 2009-07-06 09:04:29 +00:00
ac79935c88 Init default values. 2009-07-06 09:03:12 +00:00
2aca4673ab Some progress implementing new api. 2009-07-06 09:02:20 +00:00
43893d5d0f Add NV5x DXT5 decompressor. 2009-07-06 09:00:30 +00:00
009eaf2aa6 Fix msvc warnings. 2009-07-06 08:59:48 +00:00
fd2492670e Add a QPF timer. 2009-07-06 08:57:36 +00:00
7d88f4fa32 Merge changes from 2.0 branch. 2009-07-06 08:56:55 +00:00
60022acaa7 Add dxt5 tests to testsuite. 2009-07-04 19:34:42 +00:00
a5faf51738 Add simd power solver. 2009-07-04 19:33:55 +00:00
8365df0adf Add example textures with alpha. 2009-07-04 19:32:09 +00:00
2d38f4fb2c Update cmake scripts to use new FindCUDA package. 2009-06-26 06:34:19 +00:00
3adf00b4b9 Use official FindCUDA cmake script. 2009-06-26 06:33:34 +00:00
63897f3fe6 Mark threads as required. 2009-06-13 14:27:53 +00:00
71f29a27f3 Fix error in zero padding. 2009-06-13 14:18:31 +00:00
720be412fa Return correct error codes. Fix issue 92. 2009-06-13 14:17:46 +00:00
8d361eee22 Use memory allocator correctly. 2009-06-13 14:17:10 +00:00
8d54d22cb2 Update vc9 project files. 2009-04-21 09:31:59 +00:00
782a127071 Add alpha flag to DXT1a files. 2009-04-16 20:06:08 +00:00
154e117e13 Update nvmath project file. 2009-04-16 08:43:16 +00:00
603d8ad1a2 Update vc9 project. 2009-04-16 08:38:39 +00:00
bed4d78f6b Remove static member that was not thread safe! 2009-04-12 03:13:13 +00:00
d7f8fba7a7 Add comment about thread safety. 2009-04-12 03:10:40 +00:00
e0a7e103c1 Add include to all configurations. 2009-04-11 16:10:31 +00:00
319ed6bac0 Required include added 2009-04-11 16:07:12 +00:00
9e959d0191 Update wrapper, disable error handler temporarily. 2009-04-03 21:39:44 +00:00
53265596a3 Update nvtt wrapper. 2009-04-03 21:37:50 +00:00
ae24cb163d Remove msvc warnings. 2009-04-01 07:17:25 +00:00
fb75d6065d Update projects. 2009-04-01 07:13:47 +00:00
ae744f88e6 Add constructor that takes a stream. 2009-04-01 07:13:13 +00:00
5ac76b68c9 Add option to select decompression algorithm to test suite. 2009-03-24 17:35:40 +00:00
f2090df7a5 Add support for FreeBSD. Patch by AMDmi3. 2009-03-21 07:44:26 +00:00
0a8de141a6 Fix errors on win32. Define function pointers properly. 2009-03-21 07:43:15 +00:00
9aaee3ae16 Add proper todo message. 2009-03-21 07:42:36 +00:00
20 changed files with 993 additions and 885 deletions

View File

@ -1,3 +1,10 @@
NVIDIA Texture Tools version 2.0.7
* Output correct exit codes. Fixes issue 92.
* Fix thread-safety errors. Fixes issue 90.
* Add SIMD power method. Fixes issue 94.
* Interact better with applications that already use CUDA.
* Faster CPU compression.
NVIDIA Texture Tools version 2.0.6
* Fix dll version checking.
* Detect CUDA 2.1 and future CUDA versions correctly.

View File

@ -1 +1 @@
2.0.6
2.0.7

View File

@ -105,7 +105,8 @@ ENDIF(OPENEXR_FOUND)
FIND_PACKAGE(Qt4)
# Threads
FIND_PACKAGE(Threads)
FIND_PACKAGE(Threads REQUIRED)
MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}")
# configuration file
INCLUDE(CheckIncludeFiles)

View File

@ -38,7 +38,7 @@
# include <unistd.h> // getpid
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
# include <ucontext.h>
# include <sys/ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace

View File

@ -115,6 +115,7 @@ namespace nv
{
NVCORE_API void dumpInfo();
// These functions are not thread safe.
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();

View File

@ -545,8 +545,6 @@ const char * Path::extension(const char * str)
}
// static
String String::s_null(String::null);
/// Clone this string
String String::clone() const
@ -557,13 +555,13 @@ String String::clone() const
void String::setString(const char * str)
{
if( str == NULL ) {
data = s_null.data;
if (str == NULL) {
data = NULL;
}
else {
allocString( str );
addRef();
}
addRef();
}
void String::setString(const char * str, int length)
@ -576,11 +574,11 @@ void String::setString(const char * str, int length)
void String::setString(const StringBuilder & str)
{
if( str.str() == NULL ) {
data = s_null.data;
if (str.str() == NULL) {
data = NULL;
}
else {
allocString(str);
addRef();
}
addRef();
}

View File

@ -151,15 +151,14 @@ namespace nv
/// Constructs a null string. @sa isNull()
String()
{
data = s_null.data;
addRef();
data = NULL;
}
/// Constructs a shared copy of str.
String(const String & str)
{
data = str.data;
addRef();
if (data != NULL) addRef();
}
/// Constructs a shared string from a standard string.
@ -183,7 +182,6 @@ namespace nv
/// Dtor.
~String()
{
nvDebugCheck(data != NULL);
release();
}
@ -220,43 +218,49 @@ namespace nv
/// Equal operator.
bool operator==( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return true;
}
if ((data == NULL) != (str.data == NULL)) {
return false;
}
return strcmp(data, str.data) == 0;
}
/// Equal operator.
bool operator==( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) == 0;
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return false;
}
if ((data == NULL) != (str.data == NULL)) {
return true;
}
return strcmp(data, str.data) != 0;
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) != 0;
}
/// Returns true if this string is the null string.
bool isNull() const { nvDebugCheck(data != NULL); return data == s_null.data; }
bool isNull() const { return data == NULL; }
/// Return the exact length.
uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); }
@ -265,44 +269,45 @@ namespace nv
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
/// const char * cast operator.
operator const char * () const { nvDebugCheck(data != NULL); return data; }
operator const char * () const { return data; }
/// Get string pointer.
const char * str() const { nvDebugCheck(data != NULL); return data; }
const char * str() const { return data; }
private:
enum null_t { null };
// Private constructor for null string.
String(null_t) {
setString("");
}
// Add reference count.
void addRef() {
nvDebugCheck(data != NULL);
setRefCount(getRefCount() + 1);
}
// Decrease reference count.
void release() {
nvDebugCheck(data != NULL);
const uint16 count = getRefCount();
setRefCount(count - 1);
if( count - 1 == 0 ) {
mem::free(data - 2);
data = NULL;
void addRef()
{
if (data != NULL)
{
setRefCount(getRefCount() + 1);
}
}
uint16 getRefCount() const {
// Decrease reference count.
void release()
{
if (data != NULL)
{
const uint16 count = getRefCount();
setRefCount(count - 1);
if (count - 1 == 0) {
mem::free(data - 2);
data = NULL;
}
}
}
uint16 getRefCount() const
{
nvDebugCheck(data != NULL);
return *reinterpret_cast<const uint16 *>(data - 2);
}
void setRefCount(uint16 count) {
nvDebugCheck(data != NULL);
nvCheck(count < 0xFFFF);
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
}
@ -341,8 +346,6 @@ namespace nv
private:
NVCORE_API static String s_null;
const char * data;
};

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (0 << 8) | (6); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (7); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;

View File

@ -78,7 +78,7 @@ void Image::unwrap()
void Image::free()
{
::free(m_data);
nv::mem::free(m_data);
m_data = NULL;
}

View File

@ -332,7 +332,7 @@ inline Matrix transpose(Matrix::Arg m)
Matrix r;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; i++)
for (int j = 0; j < 4; j++)
{
r(i, j) = m(j, i);
}

View File

@ -205,9 +205,9 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
ColorBlock rgba;
BlockDXT1 block;
//squish::WeightedClusterFit fit;
squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
squish::FastClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
@ -221,7 +221,7 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}

View File

@ -53,7 +53,7 @@ using namespace nvtt;
namespace
{
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a) {
@ -121,7 +121,7 @@ namespace nvtt
m_fixedImage = NULL;
m_floatImage = image;
}
// Convert linear float image to fixed image ready for compression.
void toFixedImage(const InputOptions::Private & inputOptions)
@ -153,7 +153,7 @@ namespace nvtt
if (inputOptions.isNormalMap)
{
// Expand normals to [-1, 1] range.
// floatImage->expandNormals(0);
// floatImage->expandNormals(0);
}
else if (inputOptions.inputGamma != 1.0f)
{
@ -193,7 +193,7 @@ namespace nvtt
return m_fixedImage.ptr();
}
private:
const Image * m_inputImage;
AutoPtr<Image> m_fixedImage;
@ -207,28 +207,16 @@ Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = m.cudaSupported;
m.cudaEnabled = false;
m.cudaDevice = -1;
if (m.cudaEnabled)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
enableCudaAcceleration(m.cudaSupported);
}
Compressor::~Compressor()
{
enableCudaAcceleration(false);
delete &m;
cuda::exit();
}
@ -237,21 +225,33 @@ void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
m.cudaEnabled = enable;
}
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
if (m.cudaEnabled && !enable)
{
m.cudaEnabled = false;
m.cuda = NULL;
if (m.cudaDevice != -1)
{
// Exit device.
cuda::exitDevice();
}
}
else if (!m.cudaEnabled && enable)
{
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
if (m.cudaEnabled)
{
// Create compressor if initialization succeeds.
m.cuda = new CudaCompressor();
// But cleanup if failed.
if (!m.cuda->isValid())
{
enableCudaAcceleration(false);
}
}
}
}
}
@ -292,9 +292,9 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
return false;
}
inputOptions.computeTargetExtents();
// Output DDS header.
if (!outputHeader(inputOptions, compressionOptions, outputOptions))
{
@ -310,7 +310,7 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
}
outputOptions.closeFile();
return true;
}
@ -325,15 +325,15 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
}
DDSHeader header;
header.setWidth(inputOptions.targetWidth);
header.setHeight(inputOptions.targetHeight);
int mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
header.setMipmapCount(mipmapCount);
if (inputOptions.textureType == TextureType_2D) {
header.setTexture2D();
}
@ -341,10 +341,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setTextureCube();
}
/*else if (inputOptions.textureType == TextureType_3D) {
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
}*/
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
@ -353,7 +353,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
else
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
@ -376,10 +376,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
header.swapBytes();
uint headerSize = 128;
if (header.hasDX10Header())
{
@ -392,7 +392,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
{
outputOptions.errorHandler->error(Error_FileWrite);
}
return writeSucceed;
}
@ -428,7 +428,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
return false;
}
}
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
@ -438,7 +438,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
h = max(1U, h / 2);
d = max(1U, d / 2);
}
return true;
}
@ -489,7 +489,7 @@ int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOpti
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
{
if (inputImage.data != NULL)
@ -544,7 +544,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::
mipmap.toFloatImage(inputOptions);
const FloatImage * floatImage = mipmap.asFloatImage();
if (inputOptions.mipmapFilter == MipmapFilter_Box)
{
// Use fast downsample.
@ -562,7 +562,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::
filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Normalize mipmap.
if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
{
@ -590,7 +590,7 @@ void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions:
if (inputOptions.convertToNormalMap)
{
mipmap.toFixedImage(inputOptions);
Vector4 heightScale = inputOptions.heightFactors;
mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
}
@ -715,29 +715,29 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
#endif
#if defined(HAVE_ATITC)
if (compressionOptions.externalCompressor == "ati")
{
atiCompressDXT1(image, outputOptions);
}
else
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
if (compressionOptions.externalCompressor == "ati")
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
atiCompressDXT1(image, outputOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1a)
{
@ -828,27 +828,27 @@ int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions
const uint bitCount = compressionOptions.bitcount;
inputOptions.computeTargetExtents();
uint mipmapCount = inputOptions.realMipmapCount();
int size = 0;
for (uint f = 0; f < inputOptions.faceCount; f++)
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
for (uint m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, format);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
}
return size;
}

View File

@ -63,10 +63,12 @@ namespace nvtt
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
int cudaDevice;
nv::AutoPtr<nv::CudaCompressor> cuda;

View File

@ -94,7 +94,7 @@ void InputOptions::reset()
m.textureType = TextureType_2D;
m.inputFormat = InputFormat_BGRA_8UB;
m.alphaMode = AlphaMode_Transparency;
m.alphaMode = AlphaMode_None;
m.inputGamma = 2.2f;
m.outputGamma = 2.2f;

View File

@ -1,239 +1,300 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
// Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
// deviceProp.name != Device Emulation (CPU)
if (deviceProp.major == -1 || deviceProp.minor == -1)
{
return false;
}
}
// @@ Make sure that warp size == 32
return count > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = 0;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
int current_device = 0;
while (current_device < device_count)
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
{
max_gflops = gflops;
max_gflops_device = current_device;
}
}
current_device++;
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
return result == cudaSuccess;
#else
return false;
#endif
}
void nv::cuda::exit()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
// Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
// deviceProp.name != Device Emulation (CPU)
if (deviceProp.major == -1 || deviceProp.minor == -1)
{
return false;
}
}
// @@ Make sure that warp size == 32
// @@ Make sure available GPU is faster than the CPU.
return count > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
// Make sure device meets requirements:
// - Not an emulation device.
// - Not an integrated device?
// - Faster than CPU.
bool nv::cuda::isValidDevice(int i)
{
#if defined HAVE_CUDA
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major == -1 || device_properties.minor == -1) {
// Emulation device.
return false;
}
#if CUDART_VERSION >= 2030 // 2.3
/*if (device_properties.integrated)
{
// Integrated devices.
return false;
}*/
#endif
return true;
#else
return false;
#endif
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = -1;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
for (int i = 0; i < device_count; i++)
{
if (isValidDevice(i))
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (gflops > max_gflops)
{
max_gflops = gflops;
max_gflops_device = i;
}
}
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::initDevice(int * device_ptr)
{
nvDebugCheck(device_ptr != NULL);
#if defined HAVE_CUDA
#if CUDART_VERSION >= 2030 // 2.3
// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
#endif
int device = getFastestDevice();
if (device == -1)
{
// No device is fast enough.
*device_ptr = -1;
return false;
}
// Select CUDA device.
cudaError_t result = cudaSetDevice(device);
if (result == cudaErrorSetOnActiveProcess)
{
int device;
result = cudaGetDevice(&device);
*device_ptr = -1; // No device to cleanup.
return isValidDevice(device); // Return true if device is valid.
}
else if (result != cudaSuccess)
{
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
*device_ptr = -1;
return false;
}
*device_ptr = device;
return true;
#else
return false;
#endif
}
void nv::cuda::exitDevice()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}

View File

@ -32,8 +32,10 @@ namespace nv
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool setDevice(int i);
void exit();
bool isValidDevice(int i);
bool initDevice(int * device_ptr);
void exitDevice();
};
} // nv namespace

View File

@ -73,7 +73,7 @@ namespace nvtt
Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x
// DX10 formats.
Format_BC1 = Format_DXT1,
@ -194,7 +194,7 @@ namespace nvtt
// Describe the format of the input.
NVTT_API void setFormat(InputFormat format);
// Set the way the input alpha channel is interpreted. @@ Not implemented!
// Set the way the input alpha channel is interpreted.
NVTT_API void setAlphaMode(AlphaMode alphaMode);
// Set gamma settings.

View File

@ -24,6 +24,7 @@
-------------------------------------------------------------------------- */
#include "maths.h"
#include "simd.h"
#include <cfloat>
namespace squish {
@ -60,12 +61,39 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
}
#define POWER_ITERATION_COUNT 8
#if SQUISH_USE_SIMD
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
const int NUM = 8;
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
Vec4 v = VEC4_CONST( 1.0f );
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
{
// matrix multiply
Vec4 w = row0*v.SplatX();
w = MultiplyAdd(row1, v.SplatY(), w);
w = MultiplyAdd(row2, v.SplatZ(), w);
// get max component from xyz in all channels
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
// divide through and advance
v = w*Reciprocal(a);
}
return v.GetVec3();
}
#else
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 v(1, 1, 1);
for(int i = 0; i < NUM; i++) {
for (int i = 0; i < POWER_ITERATION_COUNT; i++)
{
float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
@ -82,5 +110,6 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
return v;
}
#endif
} // namespace squish

File diff suppressed because it is too large Load Diff

View File

@ -87,7 +87,10 @@ struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
#if _DEBUG
nvDebugBreak();
#endif
printf("Error: '%s'\n", nvtt::errorString(e));
}
};
@ -254,7 +257,12 @@ int main(int argc, char *argv[])
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor);
if (input.isNull())
{
@ -281,7 +289,7 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
return 1;
return EXIT_FAILURE;
}
// @@ Make sure input file exists.
@ -296,13 +304,13 @@ int main(int argc, char *argv[])
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return 1;
return EXIT_FAILURE;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return 1;
return EXIT_FAILURE;
}
uint faceCount;
@ -339,7 +347,7 @@ int main(int argc, char *argv[])
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
return EXIT_FAILURE;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
@ -402,7 +410,7 @@ int main(int argc, char *argv[])
if (outputHandler.stream->isError())
{
fprintf(stderr, "Error opening '%s' for writting\n", output.str());
return 1;
return EXIT_FAILURE;
}
nvtt::Compressor compressor;
@ -416,7 +424,7 @@ int main(int argc, char *argv[])
else
{
printf("DISABLED\n\n");
}
}
outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
outputHandler.setDisplayProgress(!silent);
@ -430,27 +438,16 @@ int main(int argc, char *argv[])
// fflush(stdout);
// getchar();
/* LARGE_INTEGER temp;
QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
double freq = ((double) temp.QuadPart) / 1000.0;
LARGE_INTEGER start_time;
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
*/
clock_t start = clock();
compressor.process(inputOptions, compressionOptions, outputOptions);
/*
LARGE_INTEGER end_time;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
printf("\rtime taken: %.3f seconds\n", diff_time/1000);
*/
if (!compressor.process(inputOptions, compressionOptions, outputOptions))
{
return EXIT_FAILURE;
}
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
return EXIT_SUCCESS;
}