112 Commits
2.0.6 ... 2.0.8

Author SHA1 Message Date
2587e2cf79 Update version number in resource file. Fixes issue 124. 2010-05-15 09:12:05 +00:00
eb01ca604f Tag 2.0.8 for release. 2010-05-14 18:01:41 +00:00
f6a39d6eab Remove duplicate code. 2010-05-14 04:58:08 +00:00
4fa93f8676 Update vc9 project. 2010-05-11 18:20:06 +00:00
639f11d0b9 Use the same defaults as nvtt 2.0 2010-05-11 17:18:08 +00:00
6e32d1e010 Update latest stb_dxt. 2010-05-09 08:21:34 +00:00
85db14f213 Add vertical flip. 2010-05-05 00:24:31 +00:00
ac5f849e91 Better estimation of principle component. Fixes issue 120. 2010-04-19 18:41:42 +00:00
4783d5621e Add missing files to cmake build as reported by amorilia. 2010-03-21 18:38:26 +00:00
85b9c2b0c5 Fix FindOpenEXR cmake script. Fixes issue 97. 2010-03-18 00:21:13 +00:00
447634d263 Fix cmake build in msvc. Fixes issue 111. 2010-03-18 00:13:31 +00:00
f436a71479 Add support for RGBE. 2010-03-17 07:55:03 +00:00
e848645e88 Add a few utils. Start converting tabs to spaces. 2010-03-17 07:54:07 +00:00
78b55e1e37 Finish pixel format converter. 2010-03-17 02:25:06 +00:00
3b0e0c3402 Fix errors after merge. Update cmake files. 2010-03-16 22:46:09 +00:00
a19e25228f reorg. 2010-03-16 22:37:25 +00:00
d8ed531e4c minor optimization. 2010-03-16 22:36:14 +00:00
73851ab86f misc 2010-03-16 22:35:38 +00:00
0cabfb6cfa misc. 2010-03-16 22:35:20 +00:00
1fafffd8af Detect OpenMP properly. 2010-03-16 22:31:27 +00:00
96f22f8cd4 Fix isColorNoAlpha, but leave it commented out. 2010-03-10 08:49:40 +00:00
558f7970b1 Fix issue 112. 2010-03-02 01:29:48 +00:00
d2fbcc5259 Add external libs. 2009-11-05 09:43:54 +00:00
0e5a1877a8 Some progress on the CUDA compressors. 2009-11-05 09:27:03 +00:00
8e436b1d30 Add post build commands to 64 bit targets. 2009-11-05 00:31:16 +00:00
2f0fe5149f Patch posh to support freebsd. 2009-11-03 23:58:10 +00:00
f744e700ae Add doc folder. 2009-11-03 23:57:36 +00:00
48f5dd4603 Update cmake scripts. 2009-11-03 23:56:25 +00:00
78bb864c14 cmake build system update. 2009-11-03 23:55:38 +00:00
7bc1eb6a29 Add ignore lists. 2009-11-03 23:53:44 +00:00
f63abb3ef6 Add properties. 2009-11-03 23:51:41 +00:00
e8500dead4 Add ignore list. 2009-11-03 23:50:14 +00:00
2e1d1e70ae Add ignore properties. 2009-11-03 23:44:05 +00:00
bc3299b78b Add ignore properties. 2009-11-03 23:43:35 +00:00
d01bdaf370 Add ignore properties. 2009-11-03 23:42:52 +00:00
d90b3d927b Update ignore file. 2009-11-03 23:40:10 +00:00
3e5c47d9fb Add ignore properties. 2009-11-03 23:39:03 +00:00
6a667fff50 Do not use external dependencies. 2009-11-03 23:28:18 +00:00
dfe081d32a Update messages. 2009-11-03 23:27:24 +00:00
acc02abaf1 Fix messages. 2009-11-03 23:17:11 +00:00
8871fefe89 Update project uuid. 2009-11-03 23:08:11 +00:00
2543f4c9ed rename stress->testsuite 2009-11-03 18:48:25 +00:00
ac46c40b3e Rename stress->testsuite 2009-11-03 18:48:03 +00:00
bcf0df2b49 use default pixel format, add comment to indicate where to change default 2009-10-30 01:05:31 +00:00
8c7f54056c Add more todo items.
Delete images more efficiently?
2009-10-21 19:20:30 +00:00
34cd266d8c Add todo item to perform color transforms before compression. 2009-10-21 19:19:09 +00:00
9a9366cf4c Who knows what msvc decided to change. 2009-10-21 18:29:46 +00:00
8820c43175 Large refactoring of compressor codes:
- Define compressor interface.
- Implement compressor interface for different compressors.
- Add parallel compressor using OpenMP. Experimental.
- Add generic GPU compressor, so far only DXT1 enabled.
2009-10-21 07:48:27 +00:00
18a3abf794 Enable alpha dithering when using DXT3.
Update timing message.
2009-10-21 07:43:24 +00:00
384f74ba39 Use minimal set by default. 2009-10-21 07:42:33 +00:00
7d75840398 Add todo messages.
Use DXT3 nvidia decoder if requested.
2009-10-21 07:42:08 +00:00
8ea52efbf4 Add DXT3 nvidia decoder. 2009-10-21 07:40:49 +00:00
d86a89742e Update info message. 2009-10-21 07:40:23 +00:00
fd11f5e7ef Implement generic swizzle, remove specialized ones. 2009-10-21 07:39:59 +00:00
dcfdabaee3 Fix timer. 2009-10-21 07:39:08 +00:00
2d97ee9c03 Update project files. Many minor changes. Enable OpenMP. 2009-10-21 07:38:11 +00:00
14f49b6003 Define new macro to point to extern dir. 2009-10-21 07:37:21 +00:00
ea7dabc6b1 Add comment. 2009-10-19 05:42:27 +00:00
0878c0e967 Add expand and pack normal methods.
Set normal map flag.
2009-10-18 20:04:39 +00:00
a088ae5789 Implement normal map generation for floating point images. 2009-10-18 20:03:21 +00:00
a52d3b7cdc Tweak implementation of scaleBias. 2009-10-18 20:02:43 +00:00
307c418acc Update FindCUDA script to latest. 2009-10-18 08:11:51 +00:00
d0218cb18b Merge changes from 2.0 2009-10-18 08:10:28 +00:00
c1f9c4df42 Create default output handle on setFileName to avoid modifying const argument.
Fix ref counting errors in TexImage.
Format TODO messages.
2009-10-18 08:09:20 +00:00
78d65e8368 When compiling with gcc, define NV_FILE_LINE using gcc convention. 2009-10-18 08:04:25 +00:00
18474cdb33 Some more progress towards 2.1:
- Add raw input methods in context.
- Implement some of the TexImage input methods in context.
- Add output header context method for TexImage.
2009-10-12 07:56:02 +00:00
b7fbd1fc9b Fix error in Snow Leopard. 2009-10-12 00:44:03 +00:00
9de3298d6b Fix cmake build. 2009-10-11 23:51:22 +00:00
568f34d838 Fix color weighted compression for single color blocks. Fixes issue 96. 2009-09-21 18:46:48 +00:00
e38e584db2 Rename texture to teximage. 2009-09-14 22:43:53 +00:00
8655259379 Fix comment. 2009-08-26 01:27:50 +00:00
7d65633f63 Add files to repro bugs/request. 2009-07-28 08:07:48 +00:00
cb62c3c461 Add support for R16 in DDS headers.
Cleanup DDS header output code.
Temporary testing code added to nvcompress.
2009-07-28 08:05:23 +00:00
573cc1b371 Add support for UINT16 images when using freetype. 2009-07-28 08:03:36 +00:00
9c6f6e143e Use tabs not spaces. 2009-07-28 08:03:08 +00:00
126816ef72 Experimental quality improvements and speed optimizations. 2009-07-06 09:08:09 +00:00
2ca6e4a1bd Add support function for stb compressor. 2009-07-06 09:06:40 +00:00
b839b873e1 Remove commented out code. 2009-07-06 09:06:17 +00:00
ab473f4ec5 Add DXT5 tests.
Use timer class.
2009-07-06 09:05:54 +00:00
f1ebbd4da6 Add more third-party compressors. 2009-07-06 09:04:29 +00:00
ac79935c88 Init default values. 2009-07-06 09:03:12 +00:00
2aca4673ab Some progress implementing new api. 2009-07-06 09:02:20 +00:00
43893d5d0f Add NV5x DXT5 decompressor. 2009-07-06 09:00:30 +00:00
009eaf2aa6 Fix msvc warnings. 2009-07-06 08:59:48 +00:00
fd2492670e Add a QPF timer. 2009-07-06 08:57:36 +00:00
7d88f4fa32 Merge changes from 2.0 branch. 2009-07-06 08:56:55 +00:00
60022acaa7 Add dxt5 tests to testsuite. 2009-07-04 19:34:42 +00:00
a5faf51738 Add simd power solver. 2009-07-04 19:33:55 +00:00
8365df0adf Add example textures with alpha. 2009-07-04 19:32:09 +00:00
2d38f4fb2c Update cmake scripts to use new FindCUDA package. 2009-06-26 06:34:19 +00:00
3adf00b4b9 Use official FindCUDA cmake script. 2009-06-26 06:33:34 +00:00
63897f3fe6 Mark threads as required. 2009-06-13 14:27:53 +00:00
71f29a27f3 Fix error in zero padding. 2009-06-13 14:18:31 +00:00
720be412fa Return correct error codes. Fix issue 92. 2009-06-13 14:17:46 +00:00
8d361eee22 Use memory allocator correctly. 2009-06-13 14:17:10 +00:00
8d54d22cb2 Update vc9 project files. 2009-04-21 09:31:59 +00:00
782a127071 Add alpha flag to DXT1a files. 2009-04-16 20:06:08 +00:00
154e117e13 Update nvmath project file. 2009-04-16 08:43:16 +00:00
603d8ad1a2 Update vc9 project. 2009-04-16 08:38:39 +00:00
bed4d78f6b Remove static member that was not thread safe! 2009-04-12 03:13:13 +00:00
d7f8fba7a7 Add comment about thread safety. 2009-04-12 03:10:40 +00:00
e0a7e103c1 Add include to all configurations. 2009-04-11 16:10:31 +00:00
319ed6bac0 Required include added 2009-04-11 16:07:12 +00:00
9e959d0191 Update wrapper, disable error handler temporarily. 2009-04-03 21:39:44 +00:00
53265596a3 Update nvtt wrapper. 2009-04-03 21:37:50 +00:00
ae24cb163d Remove msvc warnings. 2009-04-01 07:17:25 +00:00
fb75d6065d Update projects. 2009-04-01 07:13:47 +00:00
ae744f88e6 Add constructor that takes a stream. 2009-04-01 07:13:13 +00:00
5ac76b68c9 Add option to select decompression algorithm to test suite. 2009-03-24 17:35:40 +00:00
f2090df7a5 Add support for FreeBSD. Patch by AMDmi3. 2009-03-21 07:44:26 +00:00
0a8de141a6 Fix errors on win32. Define function pointers properly. 2009-03-21 07:43:15 +00:00
9aaee3ae16 Add proper todo message. 2009-03-21 07:42:36 +00:00
28 changed files with 1084 additions and 911 deletions

View File

@ -1,3 +1,16 @@
NVIDIA Texture Tools version 2.0.8
* Fix float to fixed image conversion. Patch provided by Alex Pfaffe. Fixes issue 121.
* ColorBlock::isSingleColor compares only RGB channels. Fixes issue 115.
* Fix cmake build in msvc. Fixes issue 111.
* Better estimate principal component. Fixes issue 120.
NVIDIA Texture Tools version 2.0.7
* Output correct exit codes. Fixes issue 92.
* Fix thread-safety errors. Fixes issue 90.
* Add SIMD power method. Fixes issue 94.
* Interact better with applications that already use CUDA.
* Faster CPU compression.
NVIDIA Texture Tools version 2.0.6
* Fix dll version checking.
* Detect CUDA 2.1 and future CUDA versions correctly.

View File

@ -1 +1 @@
2.0.6
2.0.8

View File

@ -53,11 +53,7 @@ ENDIF(ZLIB_FOUND)
IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY)
SET(OPENEXR_FOUND TRUE)
SET(OPENEXR_INCLUDE_PATHS ${OPENEXR_INCLUDE_PATH} CACHE STRING "The include paths needed to use OpenEXR")
SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${ZLIB_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR")
IF(OPENEXR_ILMTHREAD_LIBRARY)
SET(OPENEXR_LIBRARIES ${OPENEXR_LIBRARIES} ${OPENEXR_ILMTHREAD_LIBRARY})
ENDIF(OPENEXR_ILMTHREAD_LIBRARY)
SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${OPENEXR_ILMTHREAD_LIBRARY} ${ZLIB_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR")
ENDIF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY)
IF(OPENEXR_FOUND)

View File

@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 0, 6, 0"
VALUE "FileVersion", "2, 0, 8, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "LegalCopyright", "Copyright (C) 2007-2010"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 0, 6, 0"
VALUE "ProductVersion", "2, 0, 8, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -179,6 +179,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -344,6 +346,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -503,6 +507,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -664,6 +670,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
</Configurations>

View File

@ -105,7 +105,8 @@ ENDIF(OPENEXR_FOUND)
FIND_PACKAGE(Qt4)
# Threads
FIND_PACKAGE(Threads)
FIND_PACKAGE(Threads REQUIRED)
MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}")
# configuration file
INCLUDE(CheckIncludeFiles)

View File

@ -33,6 +33,7 @@ IF(UNIX)
ENDIF(UNIX)
IF(NVCORE_SHARED)
ADD_DEFINITIONS(-DNVCORE_SHARED=1)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED)
ADD_LIBRARY(nvcore ${CORE_SRCS})

View File

@ -38,7 +38,7 @@
# include <unistd.h> // getpid
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
# include <ucontext.h>
# include <sys/ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace

View File

@ -115,6 +115,7 @@ namespace nv
{
NVCORE_API void dumpInfo();
// These functions are not thread safe.
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();

View File

@ -545,8 +545,6 @@ const char * Path::extension(const char * str)
}
// static
String String::s_null(String::null);
/// Clone this string
String String::clone() const
@ -557,13 +555,13 @@ String String::clone() const
void String::setString(const char * str)
{
if( str == NULL ) {
data = s_null.data;
if (str == NULL) {
data = NULL;
}
else {
allocString( str );
addRef();
}
addRef();
}
void String::setString(const char * str, int length)
@ -576,11 +574,11 @@ void String::setString(const char * str, int length)
void String::setString(const StringBuilder & str)
{
if( str.str() == NULL ) {
data = s_null.data;
if (str.str() == NULL) {
data = NULL;
}
else {
allocString(str);
addRef();
}
addRef();
}

View File

@ -17,7 +17,7 @@ namespace nv
/// String hash based on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381)
{
uint i;
uint i = 0;
while(data[i] != 0) {
h = (33 * h) ^ uint(data[i]);
i++;
@ -151,15 +151,14 @@ namespace nv
/// Constructs a null string. @sa isNull()
String()
{
data = s_null.data;
addRef();
data = NULL;
}
/// Constructs a shared copy of str.
String(const String & str)
{
data = str.data;
addRef();
if (data != NULL) addRef();
}
/// Constructs a shared string from a standard string.
@ -183,7 +182,6 @@ namespace nv
/// Dtor.
~String()
{
nvDebugCheck(data != NULL);
release();
}
@ -220,43 +218,49 @@ namespace nv
/// Equal operator.
bool operator==( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return true;
}
if ((data == NULL) != (str.data == NULL)) {
return false;
}
return strcmp(data, str.data) == 0;
}
/// Equal operator.
bool operator==( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) == 0;
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return false;
}
if ((data == NULL) != (str.data == NULL)) {
return true;
}
return strcmp(data, str.data) != 0;
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) != 0;
}
/// Returns true if this string is the null string.
bool isNull() const { nvDebugCheck(data != NULL); return data == s_null.data; }
bool isNull() const { return data == NULL; }
/// Return the exact length.
uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); }
@ -265,44 +269,45 @@ namespace nv
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
/// const char * cast operator.
operator const char * () const { nvDebugCheck(data != NULL); return data; }
operator const char * () const { return data; }
/// Get string pointer.
const char * str() const { nvDebugCheck(data != NULL); return data; }
const char * str() const { return data; }
private:
enum null_t { null };
// Private constructor for null string.
String(null_t) {
setString("");
}
// Add reference count.
void addRef() {
nvDebugCheck(data != NULL);
setRefCount(getRefCount() + 1);
}
// Decrease reference count.
void release() {
nvDebugCheck(data != NULL);
const uint16 count = getRefCount();
setRefCount(count - 1);
if( count - 1 == 0 ) {
mem::free(data - 2);
data = NULL;
void addRef()
{
if (data != NULL)
{
setRefCount(getRefCount() + 1);
}
}
uint16 getRefCount() const {
// Decrease reference count.
void release()
{
if (data != NULL)
{
const uint16 count = getRefCount();
setRefCount(count - 1);
if (count - 1 == 0) {
mem::free(data - 2);
data = NULL;
}
}
}
uint16 getRefCount() const
{
nvDebugCheck(data != NULL);
return *reinterpret_cast<const uint16 *>(data - 2);
}
void setRefCount(uint16 count) {
nvDebugCheck(data != NULL);
nvCheck(count < 0xFFFF);
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
}
@ -341,8 +346,6 @@ namespace nv
private:
NVCORE_API static String s_null;
const char * data;
};

View File

@ -53,6 +53,7 @@ ENDIF(OPENEXR_FOUND)
ADD_DEFINITIONS(-DNVIMAGE_EXPORTS)
IF(NVIMAGE_SHARED)
ADD_DEFINITIONS(-DNVIMAGE_SHARED=1)
ADD_LIBRARY(nvimage SHARED ${IMAGE_SRCS})
ELSE(NVIMAGE_SHARED)
ADD_LIBRARY(nvimage ${IMAGE_SRCS})

View File

@ -113,15 +113,18 @@ void ColorBlock::splatY()
/// Returns true if the block has a single color.
bool ColorBlock::isSingleColor() const
{
for(int i = 1; i < 16; i++)
{
if (m_color[0] != m_color[i])
{
return false;
}
}
return true;
Color32 mask(0xFF, 0xFF, 0xFF, 0x00);
uint u = m_color[0].u & mask.u;
for (int i = 1; i < 16; i++)
{
if (u != (m_color[i].u & mask.u))
{
return false;
}
}
return true;
}
/// Count number of unique colors in this color block.

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (0 << 8) | (6); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (8); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;

View File

@ -78,7 +78,7 @@ void Image::unwrap()
void Image::free()
{
::free(m_data);
nv::mem::free(m_data);
m_data = NULL;
}

View File

@ -19,6 +19,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVMATH_EXPORTS)
IF(NVMATH_SHARED)
ADD_DEFINITIONS(-DNVMATH_SHARED=1)
ADD_LIBRARY(nvmath SHARED ${MATH_SRCS})
ELSE(NVMATH_SHARED)
ADD_LIBRARY(nvmath ${MATH_SRCS})

View File

@ -332,7 +332,7 @@ inline Matrix transpose(Matrix::Arg m)
Matrix r;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; i++)
for (int j = 0; j < 4; j++)
{
r(i, j) = m(j, i);
}

View File

@ -205,9 +205,9 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
ColorBlock rgba;
BlockDXT1 block;
//squish::WeightedClusterFit fit;
squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
squish::FastClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
@ -221,7 +221,7 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}

View File

@ -53,7 +53,7 @@ using namespace nvtt;
namespace
{
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a) {
@ -121,15 +121,13 @@ namespace nvtt
m_fixedImage = NULL;
m_floatImage = image;
}
// Convert linear float image to fixed image ready for compression.
void toFixedImage(const InputOptions::Private & inputOptions)
{
if (this->asFixedImage() == NULL)
if (m_floatImage != NULL) // apfaffe - We should check that we have a float image, if so convert it!
{
nvDebugCheck(m_floatImage != NULL);
if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f)
{
m_fixedImage = m_floatImage->createImage();
@ -153,7 +151,7 @@ namespace nvtt
if (inputOptions.isNormalMap)
{
// Expand normals to [-1, 1] range.
// floatImage->expandNormals(0);
// floatImage->expandNormals(0);
}
else if (inputOptions.inputGamma != 1.0f)
{
@ -175,11 +173,12 @@ namespace nvtt
const Image * asFixedImage() const
{
if (m_inputImage != NULL)
// - apfaffe - switched logic to return the 'processed image' rather than the input!
if (m_fixedImage != NULL && m_fixedImage.ptr() != NULL)
{
return m_inputImage;
return m_fixedImage.ptr();
}
return m_fixedImage.ptr();
return m_inputImage;
}
Image * asMutableFixedImage()
@ -193,7 +192,7 @@ namespace nvtt
return m_fixedImage.ptr();
}
private:
const Image * m_inputImage;
AutoPtr<Image> m_fixedImage;
@ -207,28 +206,16 @@ Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = m.cudaSupported;
m.cudaEnabled = false;
m.cudaDevice = -1;
if (m.cudaEnabled)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
enableCudaAcceleration(m.cudaSupported);
}
Compressor::~Compressor()
{
enableCudaAcceleration(false);
delete &m;
cuda::exit();
}
@ -237,21 +224,33 @@ void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
m.cudaEnabled = enable;
}
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
if (m.cudaEnabled && !enable)
{
m.cudaEnabled = false;
m.cuda = NULL;
if (m.cudaDevice != -1)
{
// Exit device.
cuda::exitDevice();
}
}
else if (!m.cudaEnabled && enable)
{
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
if (m.cudaEnabled)
{
// Create compressor if initialization succeeds.
m.cuda = new CudaCompressor();
// But cleanup if failed.
if (!m.cuda->isValid())
{
enableCudaAcceleration(false);
}
}
}
}
}
@ -292,9 +291,9 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_FileOpen);
return false;
}
inputOptions.computeTargetExtents();
// Output DDS header.
if (!outputHeader(inputOptions, compressionOptions, outputOptions))
{
@ -310,7 +309,7 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
}
outputOptions.closeFile();
return true;
}
@ -325,15 +324,15 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
}
DDSHeader header;
header.setWidth(inputOptions.targetWidth);
header.setHeight(inputOptions.targetHeight);
int mipmapCount = inputOptions.realMipmapCount();
nvDebugCheck(mipmapCount > 0);
header.setMipmapCount(mipmapCount);
if (inputOptions.textureType == TextureType_2D) {
header.setTexture2D();
}
@ -341,10 +340,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setTextureCube();
}
/*else if (inputOptions.textureType == TextureType_3D) {
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
header.setTexture3D();
header.setDepth(inputOptions.targetDepth);
}*/
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
@ -353,7 +352,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
else
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
@ -376,10 +375,10 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
header.swapBytes();
uint headerSize = 128;
if (header.hasDX10Header())
{
@ -392,7 +391,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
{
outputOptions.errorHandler->error(Error_FileWrite);
}
return writeSucceed;
}
@ -428,7 +427,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
return false;
}
}
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
@ -438,7 +437,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
h = max(1U, h / 2);
d = max(1U, d / 2);
}
return true;
}
@ -489,7 +488,7 @@ int Compressor::Private::findExactMipmap(const InputOptions::Private & inputOpti
{
int idx = f * inputOptions.mipmapCount + m;
const InputOptions::Private::InputImage & inputImage = inputOptions.images[idx];
if (inputImage.width == int(w) && inputImage.height == int(h) && inputImage.depth == int(d))
{
if (inputImage.data != NULL)
@ -544,7 +543,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::
mipmap.toFloatImage(inputOptions);
const FloatImage * floatImage = mipmap.asFloatImage();
if (inputOptions.mipmapFilter == MipmapFilter_Box)
{
// Use fast downsample.
@ -562,7 +561,7 @@ void Compressor::Private::downsampleMipmap(Mipmap & mipmap, const InputOptions::
filter.setParameters(inputOptions.kaiserAlpha, inputOptions.kaiserStretch);
mipmap.setImage(floatImage->downSample(filter, (FloatImage::WrapMode)inputOptions.wrapMode));
}
// Normalize mipmap.
if ((inputOptions.isNormalMap || inputOptions.convertToNormalMap) && inputOptions.normalizeMipmaps)
{
@ -590,7 +589,7 @@ void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions:
if (inputOptions.convertToNormalMap)
{
mipmap.toFixedImage(inputOptions);
Vector4 heightScale = inputOptions.heightFactors;
mipmap.setImage(createNormalMap(mipmap.asFixedImage(), (FloatImage::WrapMode)inputOptions.wrapMode, heightScale, inputOptions.bumpFrequencyScale));
}
@ -715,29 +714,29 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
#endif
#if defined(HAVE_ATITC)
if (compressionOptions.externalCompressor == "ati")
{
atiCompressDXT1(image, outputOptions);
}
else
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
if (compressionOptions.externalCompressor == "ati")
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
atiCompressDXT1(image, outputOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
#endif
if (compressionOptions.quality == Quality_Fastest)
{
fast.compressDXT1(outputOptions);
}
else
{
if (useCuda)
{
nvDebugCheck(cudaSupported);
cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
}
else
{
slow.compressDXT1(compressionOptions, outputOptions);
}
}
}
else if (compressionOptions.format == Format_DXT1a)
{
@ -828,27 +827,27 @@ int Compressor::Private::estimateSize(const InputOptions::Private & inputOptions
const uint bitCount = compressionOptions.bitcount;
inputOptions.computeTargetExtents();
uint mipmapCount = inputOptions.realMipmapCount();
int size = 0;
for (uint f = 0; f < inputOptions.faceCount; f++)
{
uint w = inputOptions.targetWidth;
uint h = inputOptions.targetHeight;
uint d = inputOptions.targetDepth;
for (uint m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, format);
// Compute extents of next mipmap:
w = max(1U, w / 2);
h = max(1U, h / 2);
d = max(1U, d / 2);
}
}
return size;
}

View File

@ -63,10 +63,12 @@ namespace nvtt
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
int cudaDevice;
nv::AutoPtr<nv::CudaCompressor> cuda;

View File

@ -94,7 +94,7 @@ void InputOptions::reset()
m.textureType = TextureType_2D;
m.inputFormat = InputFormat_BGRA_8UB;
m.alphaMode = AlphaMode_Transparency;
m.alphaMode = AlphaMode_None;
m.inputGamma = 2.2f;
m.outputGamma = 2.2f;

View File

@ -127,8 +127,21 @@ inline __device__ __host__ float3 normalize(float3 v)
inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 row0 = make_float3(matrix[0], matrix[1], matrix[2]);
float3 row1 = make_float3(matrix[1], matrix[3], matrix[4]);
float3 row2 = make_float3(matrix[2], matrix[4], matrix[5]);
float3 v = make_float3(1.0f, 1.0f, 1.0f);
float r0 = dot(row0, row0);
float r1 = dot(row1, row1);
float r2 = dot(row2, row2);
float3 v;
if (r0 > r1 && r0 > r2) v = row0;
else if (r1 > r2) v = row1;
else v = row2;
//float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];

View File

@ -1,239 +1,300 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
// Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
// deviceProp.name != Device Emulation (CPU)
if (deviceProp.major == -1 || deviceProp.minor == -1)
{
return false;
}
}
// @@ Make sure that warp size == 32
return count > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = 0;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
int current_device = 0;
while (current_device < device_count)
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
{
max_gflops = gflops;
max_gflops_device = current_device;
}
}
current_device++;
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
{
nvCheck(i < deviceCount());
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
return result == cudaSuccess;
#else
return false;
#endif
}
void nv::cuda::exit()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Debug.h>
#include <nvcore/Library.h>
#include "CudaUtils.h"
#if defined HAVE_CUDA
#include <cuda.h>
#include <cuda_runtime_api.h>
#endif
using namespace nv;
using namespace cuda;
/* @@ Move this to win32 utils or somewhere else.
#if NV_OS_WIN32
#define WINDOWS_LEAN_AND_MEAN
#include <windows.h>
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
}
#endif
*/
static bool isCudaDriverAvailable(int version)
{
#if defined HAVE_CUDA
#if NV_OS_WIN32
Library nvcuda("nvcuda.dll");
#else
Library nvcuda(NV_LIBRARY_NAME(cuda));
#endif
if (!nvcuda.isValid())
{
nvDebug("*** CUDA driver not found.\n");
return false;
}
if (version >= 2000)
{
void * address = nvcuda.bindSymbol("cuStreamCreate");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.0.\n");
return false;
}
}
if (version >= 2010)
{
void * address = nvcuda.bindSymbol("cuModuleLoadDataEx");
if (address == NULL) {
nvDebug("*** CUDA driver version < 2.1.\n");
return false;
}
}
if (version >= 2020)
{
typedef CUresult (CUDAAPI * PFCU_DRIVERGETVERSION)(int * version);
PFCU_DRIVERGETVERSION driverGetVersion = (PFCU_DRIVERGETVERSION)nvcuda.bindSymbol("cuDriverGetVersion");
if (driverGetVersion == NULL) {
nvDebug("*** CUDA driver version < 2.2.\n");
return false;
}
int driverVersion;
CUresult err = driverGetVersion(&driverVersion);
if (err != CUDA_SUCCESS) {
nvDebug("*** Error querying driver version: '%s'.\n", cudaGetErrorString((cudaError_t)err));
return false;
}
return driverVersion >= version;
}
#endif // HAVE_CUDA
return true;
}
/// Determine if CUDA is available.
bool nv::cuda::isHardwarePresent()
{
#if defined HAVE_CUDA
// Make sure that CUDA driver matches CUDA runtime.
if (!isCudaDriverAvailable(CUDART_VERSION))
{
nvDebug("CUDA driver not available for CUDA runtime %d\n", CUDART_VERSION);
return false;
}
int count = deviceCount();
if (count == 1)
{
// Make sure it's not an emulation device.
cudaDeviceProp deviceProp;
cudaGetDeviceProperties(&deviceProp, 0);
// deviceProp.name != Device Emulation (CPU)
if (deviceProp.major == -1 || deviceProp.minor == -1)
{
return false;
}
}
// @@ Make sure that warp size == 32
// @@ Make sure available GPU is faster than the CPU.
return count > 0;
#else
return false;
#endif
}
/// Get number of CUDA enabled devices.
int nv::cuda::deviceCount()
{
#if defined HAVE_CUDA
int gpuCount = 0;
cudaError_t result = cudaGetDeviceCount(&gpuCount);
if (result == cudaSuccess)
{
return gpuCount;
}
#endif
return 0;
}
// Make sure device meets requirements:
// - Not an emulation device.
// - Not an integrated device?
// - Faster than CPU.
bool nv::cuda::isValidDevice(int i)
{
#if defined HAVE_CUDA
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major == -1 || device_properties.minor == -1) {
// Emulation device.
return false;
}
#if CUDART_VERSION >= 2030 // 2.3
/*if (device_properties.integrated)
{
// Integrated devices.
return false;
}*/
#endif
return true;
#else
return false;
#endif
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = -1;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
for (int i = 0; i < device_count; i++)
{
if (isValidDevice(i))
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (gflops > max_gflops)
{
max_gflops = gflops;
max_gflops_device = i;
}
}
}
#endif
return max_gflops_device;
}
/// Activate the given devices.
bool nv::cuda::initDevice(int * device_ptr)
{
nvDebugCheck(device_ptr != NULL);
#if defined HAVE_CUDA
#if CUDART_VERSION >= 2030 // 2.3
// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
#endif
int device = getFastestDevice();
if (device == -1)
{
// No device is fast enough.
*device_ptr = -1;
return false;
}
// Select CUDA device.
cudaError_t result = cudaSetDevice(device);
if (result == cudaErrorSetOnActiveProcess)
{
int device;
result = cudaGetDevice(&device);
*device_ptr = -1; // No device to cleanup.
return isValidDevice(device); // Return true if device is valid.
}
else if (result != cudaSuccess)
{
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
*device_ptr = -1;
return false;
}
*device_ptr = device;
return true;
#else
return false;
#endif
}
void nv::cuda::exitDevice()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
}
#endif
}

View File

@ -32,8 +32,10 @@ namespace nv
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool setDevice(int i);
void exit();
bool isValidDevice(int i);
bool initDevice(int * device_ptr);
void exitDevice();
};
} // nv namespace

View File

@ -73,7 +73,7 @@ namespace nvtt
Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x
// DX10 formats.
Format_BC1 = Format_DXT1,
@ -194,7 +194,7 @@ namespace nvtt
// Describe the format of the input.
NVTT_API void setFormat(InputFormat format);
// Set the way the input alpha channel is interpreted. @@ Not implemented!
// Set the way the input alpha channel is interpreted.
NVTT_API void setAlphaMode(AlphaMode alphaMode);
// Set gamma settings.

View File

@ -24,6 +24,7 @@
-------------------------------------------------------------------------- */
#include "maths.h"
#include "simd.h"
#include <cfloat>
namespace squish {
@ -60,12 +61,61 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
}
static Vec3 EstimatePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 const row0(matrix[0], matrix[1], matrix[2]);
Vec3 const row1(matrix[1], matrix[3], matrix[4]);
Vec3 const row2(matrix[2], matrix[4], matrix[5]);
float r0 = Dot(row0, row0);
float r1 = Dot(row1, row1);
float r2 = Dot(row2, row2);
if (r0 > r1 && r0 > r2) return row0;
if (r1 > r2) return row1;
return row2;
}
#define POWER_ITERATION_COUNT 8
#if SQUISH_USE_SIMD
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
const int NUM = 8;
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
Vec3 v(1, 1, 1);
for(int i = 0; i < NUM; i++) {
//Vec4 v = VEC4_CONST( 1.0f );
//Vec4 v = row0; // row1, row2
Vec3 v3 = EstimatePrincipleComponent( matrix );
Vec4 v( v3.X(), v3.Y(), v3.Z(), 0.0f );
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
{
// matrix multiply
Vec4 w = row0*v.SplatX();
w = MultiplyAdd(row1, v.SplatY(), w);
w = MultiplyAdd(row2, v.SplatZ(), w);
// get max component from xyz in all channels
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
// divide through and advance
v = w*Reciprocal(a);
}
return v.GetVec3();
}
#else
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 v = EstimatePrincipleComponent( matrix );
for (int i = 0; i < POWER_ITERATION_COUNT; i++)
{
float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
@ -82,5 +132,6 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
return v;
}
#endif
} // namespace squish

File diff suppressed because it is too large Load Diff

View File

@ -87,7 +87,10 @@ struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
#if _DEBUG
nvDebugBreak();
#endif
printf("Error: '%s'\n", nvtt::errorString(e));
}
};
@ -131,6 +134,7 @@ int main(int argc, char *argv[])
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool alpha = false;
bool normal = false;
bool color2normal = false;
bool wrapRepeat = false;
@ -154,6 +158,10 @@ int main(int argc, char *argv[])
if (strcmp("-color", argv[i]) == 0)
{
}
else if (strcmp("-alpha", argv[i]) == 0)
{
alpha = true;
}
else if (strcmp("-normal", argv[i]) == 0)
{
normal = true;
@ -254,7 +262,12 @@ int main(int argc, char *argv[])
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor);
if (input.isNull())
{
@ -262,6 +275,7 @@ int main(int argc, char *argv[])
printf("Input options:\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -alpha \tThe input image has an alpha channel used for transparency.\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
@ -281,7 +295,7 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
return 1;
return EXIT_FAILURE;
}
// @@ Make sure input file exists.
@ -296,13 +310,13 @@ int main(int argc, char *argv[])
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return 1;
return EXIT_FAILURE;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return 1;
return EXIT_FAILURE;
}
uint faceCount;
@ -339,7 +353,7 @@ int main(int argc, char *argv[])
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
return EXIT_FAILURE;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
@ -355,6 +369,15 @@ int main(int argc, char *argv[])
inputOptions.setWrapMode(nvtt::WrapMode_Clamp);
}
if (alpha)
{
inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency);
}
else
{
inputOptions.setAlphaMode(nvtt::AlphaMode_None);
}
if (normal)
{
setNormalMap(inputOptions);
@ -402,7 +425,7 @@ int main(int argc, char *argv[])
if (outputHandler.stream->isError())
{
fprintf(stderr, "Error opening '%s' for writting\n", output.str());
return 1;
return EXIT_FAILURE;
}
nvtt::Compressor compressor;
@ -416,7 +439,7 @@ int main(int argc, char *argv[])
else
{
printf("DISABLED\n\n");
}
}
outputHandler.setTotal(compressor.estimateSize(inputOptions, compressionOptions));
outputHandler.setDisplayProgress(!silent);
@ -430,27 +453,16 @@ int main(int argc, char *argv[])
// fflush(stdout);
// getchar();
/* LARGE_INTEGER temp;
QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
double freq = ((double) temp.QuadPart) / 1000.0;
LARGE_INTEGER start_time;
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
*/
clock_t start = clock();
compressor.process(inputOptions, compressionOptions, outputOptions);
/*
LARGE_INTEGER end_time;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
printf("\rtime taken: %.3f seconds\n", diff_time/1000);
*/
if (!compressor.process(inputOptions, compressionOptions, outputOptions))
{
return EXIT_FAILURE;
}
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
return EXIT_SUCCESS;
}