112 Commits
2.0.6 ... 2.0.8

Author SHA1 Message Date
2587e2cf79 Update version number in resource file. Fixes issue 124. 2010-05-15 09:12:05 +00:00
eb01ca604f Tag 2.0.8 for release. 2010-05-14 18:01:41 +00:00
f6a39d6eab Remove duplicate code. 2010-05-14 04:58:08 +00:00
4fa93f8676 Update vc9 project. 2010-05-11 18:20:06 +00:00
639f11d0b9 Use the same defaults as nvtt 2.0 2010-05-11 17:18:08 +00:00
6e32d1e010 Update latest stb_dxt. 2010-05-09 08:21:34 +00:00
85db14f213 Add vertical flip. 2010-05-05 00:24:31 +00:00
ac5f849e91 Better estimation of principle component. Fixes issue 120. 2010-04-19 18:41:42 +00:00
4783d5621e Add missing files to cmake build as reported by amorilia. 2010-03-21 18:38:26 +00:00
85b9c2b0c5 Fix FindOpenEXR cmake script. Fixes issue 97. 2010-03-18 00:21:13 +00:00
447634d263 Fix cmake build in msvc. Fixes issue 111. 2010-03-18 00:13:31 +00:00
f436a71479 Add support for RGBE. 2010-03-17 07:55:03 +00:00
e848645e88 Add a few utils. Start converting tabs to spaces. 2010-03-17 07:54:07 +00:00
78b55e1e37 Finish pixel format converter. 2010-03-17 02:25:06 +00:00
3b0e0c3402 Fix errors after merge. Update cmake files. 2010-03-16 22:46:09 +00:00
a19e25228f reorg. 2010-03-16 22:37:25 +00:00
d8ed531e4c minor optimization. 2010-03-16 22:36:14 +00:00
73851ab86f misc 2010-03-16 22:35:38 +00:00
0cabfb6cfa misc. 2010-03-16 22:35:20 +00:00
1fafffd8af Detect OpenMP properly. 2010-03-16 22:31:27 +00:00
96f22f8cd4 Fix isColorNoAlpha, but leave it commented out. 2010-03-10 08:49:40 +00:00
558f7970b1 Fix issue 112. 2010-03-02 01:29:48 +00:00
d2fbcc5259 Add external libs. 2009-11-05 09:43:54 +00:00
0e5a1877a8 Some progress on the CUDA compressors. 2009-11-05 09:27:03 +00:00
8e436b1d30 Add post build commands to 64 bit targets. 2009-11-05 00:31:16 +00:00
2f0fe5149f Patch posh to support freebsd. 2009-11-03 23:58:10 +00:00
f744e700ae Add doc folder. 2009-11-03 23:57:36 +00:00
48f5dd4603 Update cmake scripts. 2009-11-03 23:56:25 +00:00
78bb864c14 cmake build system update. 2009-11-03 23:55:38 +00:00
7bc1eb6a29 Add ignore lists. 2009-11-03 23:53:44 +00:00
f63abb3ef6 Add properties. 2009-11-03 23:51:41 +00:00
e8500dead4 Add ignore list. 2009-11-03 23:50:14 +00:00
2e1d1e70ae Add ignore properties. 2009-11-03 23:44:05 +00:00
bc3299b78b Add ignore properties. 2009-11-03 23:43:35 +00:00
d01bdaf370 Add ignore properties. 2009-11-03 23:42:52 +00:00
d90b3d927b Update ignore file. 2009-11-03 23:40:10 +00:00
3e5c47d9fb Add ignore properties. 2009-11-03 23:39:03 +00:00
6a667fff50 Do not use external dependencies. 2009-11-03 23:28:18 +00:00
dfe081d32a Update messages. 2009-11-03 23:27:24 +00:00
acc02abaf1 Fix messages. 2009-11-03 23:17:11 +00:00
8871fefe89 Update project uuid. 2009-11-03 23:08:11 +00:00
2543f4c9ed rename stress->testsuite 2009-11-03 18:48:25 +00:00
ac46c40b3e Rename stress->testsuite 2009-11-03 18:48:03 +00:00
bcf0df2b49 use default pixel format, add comment to indicate where to change default 2009-10-30 01:05:31 +00:00
8c7f54056c Add more todo items.
Delete images more efficiently?
2009-10-21 19:20:30 +00:00
34cd266d8c Add todo item to perform color transforms before compression. 2009-10-21 19:19:09 +00:00
9a9366cf4c Who knows what msvc decided to change. 2009-10-21 18:29:46 +00:00
8820c43175 Large refactoring of compressor codes:
- Define compressor interface.
- Implement compressor interface for different compressors.
- Add parallel compressor using OpenMP. Experimental.
- Add generic GPU compressor, so far only DXT1 enabled.
2009-10-21 07:48:27 +00:00
18a3abf794 Enable alpha dithering when using DXT3.
Update timing message.
2009-10-21 07:43:24 +00:00
384f74ba39 Use minimal set by default. 2009-10-21 07:42:33 +00:00
7d75840398 Add todo messages.
Use DXT3 nvidia decoder if requested.
2009-10-21 07:42:08 +00:00
8ea52efbf4 Add DXT3 nvidia decoder. 2009-10-21 07:40:49 +00:00
d86a89742e Update info message. 2009-10-21 07:40:23 +00:00
fd11f5e7ef Implement generic swizzle, remove specialized ones. 2009-10-21 07:39:59 +00:00
dcfdabaee3 Fix timer. 2009-10-21 07:39:08 +00:00
2d97ee9c03 Update project files. Many minor changes. Enable OpenMP. 2009-10-21 07:38:11 +00:00
14f49b6003 Define new macro to point to extern dir. 2009-10-21 07:37:21 +00:00
ea7dabc6b1 Add comment. 2009-10-19 05:42:27 +00:00
0878c0e967 Add expand and pack normal methods.
Set normal map flag.
2009-10-18 20:04:39 +00:00
a088ae5789 Implement normal map generation for floating point images. 2009-10-18 20:03:21 +00:00
a52d3b7cdc Tweak implementation of scaleBias. 2009-10-18 20:02:43 +00:00
307c418acc Update FindCUDA script to latest. 2009-10-18 08:11:51 +00:00
d0218cb18b Merge changes from 2.0 2009-10-18 08:10:28 +00:00
c1f9c4df42 Create default output handle on setFileName to avoid modifying const argument.
Fix ref counting errors in TexImage.
Format TODO messages.
2009-10-18 08:09:20 +00:00
78d65e8368 When compiling with gcc, define NV_FILE_LINE using gcc convention. 2009-10-18 08:04:25 +00:00
18474cdb33 Some more progress towards 2.1:
- Add raw input methods in context.
- Implement some of the TexImage input methods in context.
- Add output header context method for TexImage.
2009-10-12 07:56:02 +00:00
b7fbd1fc9b Fix error in Snow Leopard. 2009-10-12 00:44:03 +00:00
9de3298d6b Fix cmake build. 2009-10-11 23:51:22 +00:00
568f34d838 Fix color weighted compression for single color blocks. Fixes issue 96. 2009-09-21 18:46:48 +00:00
e38e584db2 Rename texture to teximage. 2009-09-14 22:43:53 +00:00
8655259379 Fix comment. 2009-08-26 01:27:50 +00:00
7d65633f63 Add files to repro bugs/request. 2009-07-28 08:07:48 +00:00
cb62c3c461 Add support for R16 in DDS headers.
Cleanup DDS header output code.
Temporary testing code added to nvcompress.
2009-07-28 08:05:23 +00:00
573cc1b371 Add support for UINT16 images when using freetype. 2009-07-28 08:03:36 +00:00
9c6f6e143e Use tabs not spaces. 2009-07-28 08:03:08 +00:00
126816ef72 Experimental quality improvements and speed optimizations. 2009-07-06 09:08:09 +00:00
2ca6e4a1bd Add support function for stb compressor. 2009-07-06 09:06:40 +00:00
b839b873e1 Remove commented out code. 2009-07-06 09:06:17 +00:00
ab473f4ec5 Add DXT5 tests.
Use timer class.
2009-07-06 09:05:54 +00:00
f1ebbd4da6 Add more third-party compressors. 2009-07-06 09:04:29 +00:00
ac79935c88 Init default values. 2009-07-06 09:03:12 +00:00
2aca4673ab Some progress implementing new api. 2009-07-06 09:02:20 +00:00
43893d5d0f Add NV5x DXT5 decompressor. 2009-07-06 09:00:30 +00:00
009eaf2aa6 Fix msvc warnings. 2009-07-06 08:59:48 +00:00
fd2492670e Add a QPF timer. 2009-07-06 08:57:36 +00:00
7d88f4fa32 Merge changes from 2.0 branch. 2009-07-06 08:56:55 +00:00
60022acaa7 Add dxt5 tests to testsuite. 2009-07-04 19:34:42 +00:00
a5faf51738 Add simd power solver. 2009-07-04 19:33:55 +00:00
8365df0adf Add example textures with alpha. 2009-07-04 19:32:09 +00:00
2d38f4fb2c Update cmake scripts to use new FindCUDA package. 2009-06-26 06:34:19 +00:00
3adf00b4b9 Use official FindCUDA cmake script. 2009-06-26 06:33:34 +00:00
63897f3fe6 Mark threads as required. 2009-06-13 14:27:53 +00:00
71f29a27f3 Fix error in zero padding. 2009-06-13 14:18:31 +00:00
720be412fa Return correct error codes. Fix issue 92. 2009-06-13 14:17:46 +00:00
8d361eee22 Use memory allocator correctly. 2009-06-13 14:17:10 +00:00
8d54d22cb2 Update vc9 project files. 2009-04-21 09:31:59 +00:00
782a127071 Add alpha flag to DXT1a files. 2009-04-16 20:06:08 +00:00
154e117e13 Update nvmath project file. 2009-04-16 08:43:16 +00:00
603d8ad1a2 Update vc9 project. 2009-04-16 08:38:39 +00:00
bed4d78f6b Remove static member that was not thread safe! 2009-04-12 03:13:13 +00:00
d7f8fba7a7 Add comment about thread safety. 2009-04-12 03:10:40 +00:00
e0a7e103c1 Add include to all configurations. 2009-04-11 16:10:31 +00:00
319ed6bac0 Required include added 2009-04-11 16:07:12 +00:00
9e959d0191 Update wrapper, disable error handler temporarily. 2009-04-03 21:39:44 +00:00
53265596a3 Update nvtt wrapper. 2009-04-03 21:37:50 +00:00
ae24cb163d Remove msvc warnings. 2009-04-01 07:17:25 +00:00
fb75d6065d Update projects. 2009-04-01 07:13:47 +00:00
ae744f88e6 Add constructor that takes a stream. 2009-04-01 07:13:13 +00:00
5ac76b68c9 Add option to select decompression algorithm to test suite. 2009-03-24 17:35:40 +00:00
f2090df7a5 Add support for FreeBSD. Patch by AMDmi3. 2009-03-21 07:44:26 +00:00
0a8de141a6 Fix errors on win32. Define function pointers properly. 2009-03-21 07:43:15 +00:00
9aaee3ae16 Add proper todo message. 2009-03-21 07:42:36 +00:00
28 changed files with 1084 additions and 911 deletions

View File

@ -1,3 +1,16 @@
NVIDIA Texture Tools version 2.0.8
* Fix float to fixed image conversion. Patch provided by Alex Pfaffe. Fixes issue 121.
* ColorBlock::isSingleColor compares only RGB channels. Fixes issue 115.
* Fix cmake build in msvc. Fixes issue 111.
* Better estimate principal component. Fixes issue 120.
NVIDIA Texture Tools version 2.0.7
* Output correct exit codes. Fixes issue 92.
* Fix thread-safety errors. Fixes issue 90.
* Add SIMD power method. Fixes issue 94.
* Interact better with applications that already use CUDA.
* Faster CPU compression.
NVIDIA Texture Tools version 2.0.6
* Fix dll version checking.
* Detect CUDA 2.1 and future CUDA versions correctly.

View File

@ -1 +1 @@
2.0.6
2.0.8

View File

@ -53,11 +53,7 @@ ENDIF(ZLIB_FOUND)
IF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY)
SET(OPENEXR_FOUND TRUE)
SET(OPENEXR_INCLUDE_PATHS ${OPENEXR_INCLUDE_PATH} CACHE STRING "The include paths needed to use OpenEXR")
SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${ZLIB_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR")
IF(OPENEXR_ILMTHREAD_LIBRARY)
SET(OPENEXR_LIBRARIES ${OPENEXR_LIBRARIES} ${OPENEXR_ILMTHREAD_LIBRARY})
ENDIF(OPENEXR_ILMTHREAD_LIBRARY)
SET(OPENEXR_LIBRARIES ${OPENEXR_IMATH_LIBRARY} ${OPENEXR_ILMIMF_LIBRARY} ${OPENEXR_IEX_LIBRARY} ${OPENEXR_HALF_LIBRARY} ${OPENEXR_ILMTHREAD_LIBRARY} ${ZLIB_LIBRARY} CACHE STRING "The libraries needed to use OpenEXR")
ENDIF (OPENEXR_INCLUDE_PATH AND OPENEXR_IMATH_LIBRARY AND OPENEXR_ILMIMF_LIBRARY AND OPENEXR_IEX_LIBRARY AND OPENEXR_HALF_LIBRARY)
IF(OPENEXR_FOUND)

View File

@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 0, 6, 0"
VALUE "FileVersion", "2, 0, 8, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "LegalCopyright", "Copyright (C) 2007-2010"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 0, 6, 0"
VALUE "ProductVersion", "2, 0, 8, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -179,6 +179,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -344,6 +346,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -503,6 +507,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -664,6 +670,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
</Configurations>

View File

@ -105,7 +105,8 @@ ENDIF(OPENEXR_FOUND)
FIND_PACKAGE(Qt4)
# Threads
FIND_PACKAGE(Threads)
FIND_PACKAGE(Threads REQUIRED)
MESSAGE(STATUS "Use thread library: ${CMAKE_THREAD_LIBS_INIT}")
# configuration file
INCLUDE(CheckIncludeFiles)

View File

@ -33,6 +33,7 @@ IF(UNIX)
ENDIF(UNIX)
IF(NVCORE_SHARED)
ADD_DEFINITIONS(-DNVCORE_SHARED=1)
ADD_LIBRARY(nvcore SHARED ${CORE_SRCS})
ELSE(NVCORE_SHARED)
ADD_LIBRARY(nvcore ${CORE_SRCS})

View File

@ -38,7 +38,7 @@
# include <unistd.h> // getpid
# include <sys/types.h>
# include <sys/sysctl.h> // sysctl
# include <ucontext.h>
# include <sys/ucontext.h>
# undef HAVE_EXECINFO_H
# if defined(HAVE_EXECINFO_H) // only after OSX 10.5
# include <execinfo.h> // backtrace

View File

@ -115,6 +115,7 @@ namespace nv
{
NVCORE_API void dumpInfo();
// These functions are not thread safe.
NVCORE_API void setMessageHandler( MessageHandler * messageHandler );
NVCORE_API void resetMessageHandler();

View File

@ -545,8 +545,6 @@ const char * Path::extension(const char * str)
}
// static
String String::s_null(String::null);
/// Clone this string
String String::clone() const
@ -557,13 +555,13 @@ String String::clone() const
void String::setString(const char * str)
{
if( str == NULL ) {
data = s_null.data;
if (str == NULL) {
data = NULL;
}
else {
allocString( str );
}
addRef();
}
}
void String::setString(const char * str, int length)
@ -576,11 +574,11 @@ void String::setString(const char * str, int length)
void String::setString(const StringBuilder & str)
{
if( str.str() == NULL ) {
data = s_null.data;
if (str.str() == NULL) {
data = NULL;
}
else {
allocString(str);
}
addRef();
}
}

View File

@ -17,7 +17,7 @@ namespace nv
/// String hash based on Bernstein's hash.
inline uint strHash(const char * data, uint h = 5381)
{
uint i;
uint i = 0;
while(data[i] != 0) {
h = (33 * h) ^ uint(data[i]);
i++;
@ -151,15 +151,14 @@ namespace nv
/// Constructs a null string. @sa isNull()
String()
{
data = s_null.data;
addRef();
data = NULL;
}
/// Constructs a shared copy of str.
String(const String & str)
{
data = str.data;
addRef();
if (data != NULL) addRef();
}
/// Constructs a shared string from a standard string.
@ -183,7 +182,6 @@ namespace nv
/// Dtor.
~String()
{
nvDebugCheck(data != NULL);
release();
}
@ -220,43 +218,49 @@ namespace nv
/// Equal operator.
bool operator==( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return true;
}
if ((data == NULL) != (str.data == NULL)) {
return false;
}
return strcmp(data, str.data) == 0;
}
/// Equal operator.
bool operator==( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) == 0;
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
nvDebugCheck(data != NULL);
nvDebugCheck(str.data != NULL);
if( str.data == data ) {
return false;
}
if ((data == NULL) != (str.data == NULL)) {
return true;
}
return strcmp(data, str.data) != 0;
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
nvDebugCheck(data != NULL);
nvCheck(str != NULL); // Use isNull!
if (data == NULL) {
return false;
}
return strcmp(data, str) != 0;
}
/// Returns true if this string is the null string.
bool isNull() const { nvDebugCheck(data != NULL); return data == s_null.data; }
bool isNull() const { return data == NULL; }
/// Return the exact length.
uint length() const { nvDebugCheck(data != NULL); return uint(strlen(data)); }
@ -265,44 +269,45 @@ namespace nv
uint hash() const { nvDebugCheck(data != NULL); return strHash(data); }
/// const char * cast operator.
operator const char * () const { nvDebugCheck(data != NULL); return data; }
operator const char * () const { return data; }
/// Get string pointer.
const char * str() const { nvDebugCheck(data != NULL); return data; }
const char * str() const { return data; }
private:
enum null_t { null };
// Private constructor for null string.
String(null_t) {
setString("");
}
// Add reference count.
void addRef() {
nvDebugCheck(data != NULL);
void addRef()
{
if (data != NULL)
{
setRefCount(getRefCount() + 1);
}
}
// Decrease reference count.
void release() {
nvDebugCheck(data != NULL);
void release()
{
if (data != NULL)
{
const uint16 count = getRefCount();
setRefCount(count - 1);
if( count - 1 == 0 ) {
if (count - 1 == 0) {
mem::free(data - 2);
data = NULL;
}
}
}
uint16 getRefCount() const {
uint16 getRefCount() const
{
nvDebugCheck(data != NULL);
return *reinterpret_cast<const uint16 *>(data - 2);
}
void setRefCount(uint16 count) {
nvDebugCheck(data != NULL);
nvCheck(count < 0xFFFF);
*reinterpret_cast<uint16 *>(const_cast<char *>(data - 2)) = uint16(count);
}
@ -341,8 +346,6 @@ namespace nv
private:
NVCORE_API static String s_null;
const char * data;
};

View File

@ -53,6 +53,7 @@ ENDIF(OPENEXR_FOUND)
ADD_DEFINITIONS(-DNVIMAGE_EXPORTS)
IF(NVIMAGE_SHARED)
ADD_DEFINITIONS(-DNVIMAGE_SHARED=1)
ADD_LIBRARY(nvimage SHARED ${IMAGE_SRCS})
ELSE(NVIMAGE_SHARED)
ADD_LIBRARY(nvimage ${IMAGE_SRCS})

View File

@ -113,9 +113,12 @@ void ColorBlock::splatY()
/// Returns true if the block has a single color.
bool ColorBlock::isSingleColor() const
{
for(int i = 1; i < 16; i++)
Color32 mask(0xFF, 0xFF, 0xFF, 0x00);
uint u = m_color[0].u & mask.u;
for (int i = 1; i < 16; i++)
{
if (m_color[0] != m_color[i])
if (u != (m_color[i].u & mask.u))
{
return false;
}

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (2 << 16) | (0 << 8) | (6); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (8); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;

View File

@ -78,7 +78,7 @@ void Image::unwrap()
void Image::free()
{
::free(m_data);
nv::mem::free(m_data);
m_data = NULL;
}

View File

@ -19,6 +19,7 @@ INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
ADD_DEFINITIONS(-DNVMATH_EXPORTS)
IF(NVMATH_SHARED)
ADD_DEFINITIONS(-DNVMATH_SHARED=1)
ADD_LIBRARY(nvmath SHARED ${MATH_SRCS})
ELSE(NVMATH_SHARED)
ADD_LIBRARY(nvmath ${MATH_SRCS})

View File

@ -332,7 +332,7 @@ inline Matrix transpose(Matrix::Arg m)
Matrix r;
for (int i = 0; i < 4; i++)
{
for (int j = 0; j < 4; i++)
for (int j = 0; j < 4; j++)
{
r(i, j) = m(j, i);
}

View File

@ -205,9 +205,9 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
ColorBlock rgba;
BlockDXT1 block;
//squish::WeightedClusterFit fit;
squish::WeightedClusterFit fit;
//squish::ClusterFit fit;
squish::FastClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) {
@ -221,7 +221,7 @@ void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compre
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), 0);
squish::ColourSet colours((uint8 *)rgba.colors(), 0, true);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}

View File

@ -126,10 +126,8 @@ namespace nvtt
// Convert linear float image to fixed image ready for compression.
void toFixedImage(const InputOptions::Private & inputOptions)
{
if (this->asFixedImage() == NULL)
if (m_floatImage != NULL) // apfaffe - We should check that we have a float image, if so convert it!
{
nvDebugCheck(m_floatImage != NULL);
if (inputOptions.isNormalMap || inputOptions.outputGamma == 1.0f)
{
m_fixedImage = m_floatImage->createImage();
@ -175,12 +173,13 @@ namespace nvtt
const Image * asFixedImage() const
{
if (m_inputImage != NULL)
// - apfaffe - switched logic to return the 'processed image' rather than the input!
if (m_fixedImage != NULL && m_fixedImage.ptr() != NULL)
{
return m_inputImage;
}
return m_fixedImage.ptr();
}
return m_inputImage;
}
Image * asMutableFixedImage()
{
@ -207,28 +206,16 @@ Compressor::Compressor() : m(*new Compressor::Private())
{
// CUDA initialization.
m.cudaSupported = cuda::isHardwarePresent();
m.cudaEnabled = m.cudaSupported;
if (m.cudaEnabled)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
m.cudaDevice = -1;
enableCudaAcceleration(m.cudaSupported);
}
Compressor::~Compressor()
{
enableCudaAcceleration(false);
delete &m;
cuda::exit();
}
@ -237,21 +224,33 @@ void Compressor::enableCudaAcceleration(bool enable)
{
if (m.cudaSupported)
{
m.cudaEnabled = enable;
}
if (m.cudaEnabled && m.cuda == NULL)
{
// Select fastest CUDA device.
int device = cuda::getFastestDevice();
cuda::setDevice(device);
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
if (m.cudaEnabled && !enable)
{
m.cudaEnabled = false;
m.cuda = NULL;
if (m.cudaDevice != -1)
{
// Exit device.
cuda::exitDevice();
}
}
else if (!m.cudaEnabled && enable)
{
// Init the CUDA device. This may return -1 if CUDA was already initialized by the app.
m.cudaEnabled = cuda::initDevice(&m.cudaDevice);
if (m.cudaEnabled)
{
// Create compressor if initialization succeeds.
m.cuda = new CudaCompressor();
// But cleanup if failed.
if (!m.cuda->isValid())
{
enableCudaAcceleration(false);
}
}
}
}
}

View File

@ -63,10 +63,12 @@ namespace nvtt
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:
bool cudaSupported;
bool cudaEnabled;
int cudaDevice;
nv::AutoPtr<nv::CudaCompressor> cuda;

View File

@ -94,7 +94,7 @@ void InputOptions::reset()
m.textureType = TextureType_2D;
m.inputFormat = InputFormat_BGRA_8UB;
m.alphaMode = AlphaMode_Transparency;
m.alphaMode = AlphaMode_None;
m.inputGamma = 2.2f;
m.outputGamma = 2.2f;

View File

@ -128,7 +128,20 @@ inline __device__ __host__ float3 firstEigenVector( float matrix[6] )
{
// 8 iterations seems to be more than enough.
float3 v = make_float3(1.0f, 1.0f, 1.0f);
float3 row0 = make_float3(matrix[0], matrix[1], matrix[2]);
float3 row1 = make_float3(matrix[1], matrix[3], matrix[4]);
float3 row2 = make_float3(matrix[2], matrix[4], matrix[5]);
float r0 = dot(row0, row0);
float r1 = dot(row1, row1);
float r2 = dot(row2, row2);
float3 v;
if (r0 > r1 && r0 > r2) v = row0;
else if (r1 > r2) v = row1;
else v = row2;
//float3 v = make_float3(1.0f, 1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1] + v.z * matrix[2];
float y = v.x * matrix[1] + v.y * matrix[3] + v.z * matrix[4];

View File

@ -41,11 +41,11 @@ using namespace cuda;
static bool isWindowsVista()
{
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
OSVERSIONINFO osvi;
osvi.dwOSVersionInfoSize = sizeof(OSVERSIONINFO);
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
::GetVersionEx(&osvi);
return osvi.dwMajorVersion >= 6;
}
@ -53,20 +53,20 @@ typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow32()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
BOOL bIsWow64 = FALSE;
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
// Assume 32 bits.
return true;
}
}
return !bIsWow64;
return !bIsWow64;
}
#endif
@ -158,6 +158,8 @@ bool nv::cuda::isHardwarePresent()
// @@ Make sure that warp size == 32
// @@ Make sure available GPU is faster than the CPU.
return count > 0;
#else
return false;
@ -180,30 +182,59 @@ int nv::cuda::deviceCount()
return 0;
}
// Make sure device meets requirements:
// - Not an emulation device.
// - Not an integrated device?
// - Faster than CPU.
bool nv::cuda::isValidDevice(int i)
{
#if defined HAVE_CUDA
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major == -1 || device_properties.minor == -1) {
// Emulation device.
return false;
}
#if CUDART_VERSION >= 2030 // 2.3
/*if (device_properties.integrated)
{
// Integrated devices.
return false;
}*/
#endif
return true;
#else
return false;
#endif
}
int nv::cuda::getFastestDevice()
{
int max_gflops_device = 0;
int max_gflops_device = -1;
#if defined HAVE_CUDA
int max_gflops = 0;
const int device_count = deviceCount();
int current_device = 0;
while (current_device < device_count)
for (int i = 0; i < device_count; i++)
{
if (isValidDevice(i))
{
cudaDeviceProp device_properties;
cudaGetDeviceProperties(&device_properties, current_device);
cudaGetDeviceProperties(&device_properties, i);
int gflops = device_properties.multiProcessorCount * device_properties.clockRate;
if (device_properties.major != -1 && device_properties.minor != -1)
{
if( gflops > max_gflops )
if (gflops > max_gflops)
{
max_gflops = gflops;
max_gflops_device = current_device;
max_gflops_device = i;
}
}
current_device++;
}
#endif
return max_gflops_device;
@ -211,23 +242,53 @@ int nv::cuda::getFastestDevice()
/// Activate the given devices.
bool nv::cuda::setDevice(int i)
bool nv::cuda::initDevice(int * device_ptr)
{
nvCheck(i < deviceCount());
nvDebugCheck(device_ptr != NULL);
#if defined HAVE_CUDA
cudaError_t result = cudaSetDevice(i);
if (result != cudaSuccess) {
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
#if CUDART_VERSION >= 2030 // 2.3
// Set device flags to yield in order to play nice with other threads and to find out if CUDA was already active.
cudaError_t resul = cudaSetDeviceFlags(cudaDeviceScheduleYield);
#endif
int device = getFastestDevice();
if (device == -1)
{
// No device is fast enough.
*device_ptr = -1;
return false;
}
return result == cudaSuccess;
// Select CUDA device.
cudaError_t result = cudaSetDevice(device);
if (result == cudaErrorSetOnActiveProcess)
{
int device;
result = cudaGetDevice(&device);
*device_ptr = -1; // No device to cleanup.
return isValidDevice(device); // Return true if device is valid.
}
else if (result != cudaSuccess)
{
nvDebug("*** CUDA Error: %s\n", cudaGetErrorString(result));
*device_ptr = -1;
return false;
}
*device_ptr = device;
return true;
#else
return false;
#endif
}
void nv::cuda::exit()
void nv::cuda::exitDevice()
{
#if defined HAVE_CUDA
cudaError_t result = cudaThreadExit();

View File

@ -32,8 +32,10 @@ namespace nv
bool isHardwarePresent();
int deviceCount();
int getFastestDevice();
bool setDevice(int i);
void exit();
bool isValidDevice(int i);
bool initDevice(int * device_ptr);
void exitDevice();
};
} // nv namespace

View File

@ -73,7 +73,7 @@ namespace nvtt
Format_DXT1a, // DXT1 with binary alpha.
Format_DXT3,
Format_DXT5,
Format_DXT5n, // Compressed HILO: R=0, G=x, B=0, A=y
Format_DXT5n, // Compressed HILO: R=1, G=y, B=0, A=x
// DX10 formats.
Format_BC1 = Format_DXT1,
@ -194,7 +194,7 @@ namespace nvtt
// Describe the format of the input.
NVTT_API void setFormat(InputFormat format);
// Set the way the input alpha channel is interpreted. @@ Not implemented!
// Set the way the input alpha channel is interpreted.
NVTT_API void setAlphaMode(AlphaMode alphaMode);
// Set gamma settings.

View File

@ -24,6 +24,7 @@
-------------------------------------------------------------------------- */
#include "maths.h"
#include "simd.h"
#include <cfloat>
namespace squish {
@ -60,12 +61,61 @@ Sym3x3 ComputeWeightedCovariance( int n, Vec3 const* points, float const* weight
}
static Vec3 EstimatePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 const row0(matrix[0], matrix[1], matrix[2]);
Vec3 const row1(matrix[1], matrix[3], matrix[4]);
Vec3 const row2(matrix[2], matrix[4], matrix[5]);
float r0 = Dot(row0, row0);
float r1 = Dot(row1, row1);
float r2 = Dot(row2, row2);
if (r0 > r1 && r0 > r2) return row0;
if (r1 > r2) return row1;
return row2;
}
#define POWER_ITERATION_COUNT 8
#if SQUISH_USE_SIMD
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
const int NUM = 8;
Vec4 const row0( matrix[0], matrix[1], matrix[2], 0.0f );
Vec4 const row1( matrix[1], matrix[3], matrix[4], 0.0f );
Vec4 const row2( matrix[2], matrix[4], matrix[5], 0.0f );
Vec3 v(1, 1, 1);
for(int i = 0; i < NUM; i++) {
//Vec4 v = VEC4_CONST( 1.0f );
//Vec4 v = row0; // row1, row2
Vec3 v3 = EstimatePrincipleComponent( matrix );
Vec4 v( v3.X(), v3.Y(), v3.Z(), 0.0f );
for( int i = 0; i < POWER_ITERATION_COUNT; ++i )
{
// matrix multiply
Vec4 w = row0*v.SplatX();
w = MultiplyAdd(row1, v.SplatY(), w);
w = MultiplyAdd(row2, v.SplatZ(), w);
// get max component from xyz in all channels
Vec4 a = Max(w.SplatX(), Max(w.SplatY(), w.SplatZ()));
// divide through and advance
v = w*Reciprocal(a);
}
return v.GetVec3();
}
#else
Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
{
Vec3 v = EstimatePrincipleComponent( matrix );
for (int i = 0; i < POWER_ITERATION_COUNT; i++)
{
float x = v.X() * matrix[0] + v.Y() * matrix[1] + v.Z() * matrix[2];
float y = v.X() * matrix[1] + v.Y() * matrix[3] + v.Z() * matrix[4];
float z = v.X() * matrix[2] + v.Y() * matrix[4] + v.Z() * matrix[5];
@ -82,5 +132,6 @@ Vec3 ComputePrincipleComponent( Sym3x3 const& matrix )
return v;
}
#endif
} // namespace squish

View File

@ -1,28 +1,28 @@
/* -----------------------------------------------------------------------------
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Copyright (c) 2006 Simon Brown si@sjbrown.co.uk
Copyright (c) 2006 Ignacio Castano icastano@nvidia.com
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
-------------------------------------------------------------------------- */
-------------------------------------------------------------------------- */
#include "weightedclusterfit.h"
#include "colourset.h"
@ -32,12 +32,12 @@
namespace squish {
WeightedClusterFit::WeightedClusterFit()
{
}
WeightedClusterFit::WeightedClusterFit()
{
}
void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
{
void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
{
ColourFit::SetColourSet( colours, flags );
// initialise the best error
@ -102,21 +102,21 @@ void WeightedClusterFit::SetColourSet( ColourSet const* colours, int flags )
m_wsum += m_weights[i];
#endif
}
}
}
void WeightedClusterFit::SetMetric(float r, float g, float b)
{
void WeightedClusterFit::SetMetric(float r, float g, float b)
{
#if SQUISH_USE_SIMD
m_metric = Vec4(r, g, b, 0);
#else
m_metric = Vec3(r, g, b);
#endif
m_metricSqr = m_metric * m_metric;
}
}
float WeightedClusterFit::GetBestError() const
{
float WeightedClusterFit::GetBestError() const
{
#if SQUISH_USE_SIMD
Vec4 x = m_xxsum * m_metricSqr;
Vec4 error = m_besterror + x.SplatX() + x.SplatY() + x.SplatZ();
@ -125,16 +125,19 @@ float WeightedClusterFit::GetBestError() const
return m_besterror + Dot(m_xxsum, m_metricSqr);
#endif
}
}
#if SQUISH_USE_SIMD
void WeightedClusterFit::Compress3( void* block )
{
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half(0.5f, 0.5f, 0.5f, 0.25f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -146,11 +149,11 @@ void WeightedClusterFit::Compress3( void* block )
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
for( int c0 = 0; c0 <= count; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= 16-c0; c1++)
for( int c1 = 0; c1 <= count-c0; c1++)
{
Vec4 const x2 = m_xsum - x1 - x0;
@ -173,24 +176,21 @@ void WeightedClusterFit::Compress3( void* block )
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp the output to [0, 1]
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// apply the metric to the error term
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -221,17 +221,17 @@ void WeightedClusterFit::Compress3( void* block )
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < 16; i++) {
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices ); // Set alpha indices.
m_colours->RemapIndices( ordered, bestindices );
// save the block
@ -240,16 +240,20 @@ void WeightedClusterFit::Compress3( void* block )
// save the error
m_besterror = besterror;
}
}
}
void WeightedClusterFit::Compress4( void* block )
{
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec4 const one = VEC4_CONST(1.0f);
Vec4 const zero = VEC4_CONST(0.0f);
Vec4 const half = VEC4_CONST(0.5f);
Vec4 const two = VEC4_CONST(2.0);
Vec4 const onethird( 1.0f/3.0f, 1.0f/3.0f, 1.0f/3.0f, 1.0f/9.0f );
Vec4 const twothirds( 2.0f/3.0f, 2.0f/3.0f, 2.0f/3.0f, 4.0f/9.0f );
Vec4 const twonineths = VEC4_CONST( 2.0f/9.0f );
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f, 0.0f );
// declare variables
Vec4 beststart = VEC4_CONST( 0.0f );
@ -260,30 +264,30 @@ void WeightedClusterFit::Compress4( void* block )
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
for( int c0 = 0; c0 <= count; c0++)
{
Vec4 x1 = zero;
for( int c1 = 0; c1 <= 16-c0; c1++)
for( int c1 = 0; c1 <= count-c0; c1++)
{
Vec4 x2 = zero;
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
for( int c2 = 0; c2 <= count-c0-c1; c2++)
{
Vec4 const x3 = m_xsum - x2 - x1 - x0;
//Vec3 const alphax_sum = x0 + x1 * (2.0f / 3.0f) + x2 * (1.0f / 3.0f);
//float const alpha2_sum = w0 + w1 * (4.0f/9.0f) + w2 * (1.0f/9.0f);
Vec4 const alphax_sum = x0 + MultiplyAdd(x1, twothirds, x2 * onethird); // alphax_sum, alpha2_sum
Vec4 const alphax_sum = MultiplyAdd(x2, onethird, MultiplyAdd(x1, twothirds, x0)); // alphax_sum, alpha2_sum
Vec4 const alpha2_sum = alphax_sum.SplatW();
//Vec3 const betax_sum = x3 + x2 * (2.0f / 3.0f) + x1 * (1.0f / 3.0f);
//float const beta2_sum = w3 + w2 * (4.0f/9.0f) + w1 * (1.0f/9.0f);
Vec4 const betax_sum = x3 + MultiplyAdd(x2, twothirds, x1 * onethird); // betax_sum, beta2_sum
Vec4 const betax_sum = MultiplyAdd(x2, twothirds, MultiplyAdd(x1, onethird, x3)); // betax_sum, beta2_sum
Vec4 const beta2_sum = betax_sum.SplatW();
//float const alphabeta_sum = w1 * (2.0f/9.0f) + w2 * (2.0f/9.0f);
Vec4 const alphabeta_sum = two * (x1 * onethird + x2 * onethird).SplatW(); // alphabeta_sum
//float const alphabeta_sum = (w1 + w2) * (2.0f/9.0f);
Vec4 const alphabeta_sum = twonineths*( x1 + x2 ).SplatW(); // alphabeta_sum
// float const factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
Vec4 const factor = Reciprocal( NegativeMultiplySubtract(alphabeta_sum, alphabeta_sum, alpha2_sum*beta2_sum) );
@ -291,24 +295,21 @@ void WeightedClusterFit::Compress4( void* block )
Vec4 a = NegativeMultiplySubtract(betax_sum, alphabeta_sum, alphax_sum*beta2_sum) * factor;
Vec4 b = NegativeMultiplySubtract(alphax_sum, alphabeta_sum, betax_sum*alpha2_sum) * factor;
// clamp the output to [0, 1]
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec4 const grid( 31.0f, 63.0f, 31.0f, 0.0f );
Vec4 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f, 0.0f );
a = Truncate( MultiplyAdd( grid, a, half ) ) * gridrcp;
b = Truncate( MultiplyAdd( grid, b, half ) ) * gridrcp;
// compute the error
Vec4 e1 = MultiplyAdd( a, alphax_sum, b*betax_sum );
Vec4 e2 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e3 = MultiplyAdd( a*b*alphabeta_sum - e1, two, e2 );
// compute the error (we skip the constant xxsum)
Vec4 e1 = MultiplyAdd( a*a, alpha2_sum, b*b*beta2_sum );
Vec4 e2 = NegativeMultiplySubtract( a, alphax_sum, a*b*alphabeta_sum );
Vec4 e3 = NegativeMultiplySubtract( b, betax_sum, e2 );
Vec4 e4 = MultiplyAdd( two, e3, e1 );
// apply the metric to the error term
Vec4 e4 = e3 * m_metricSqr;
Vec4 error = e4.SplatX() + e4.SplatY() + e4.SplatZ();
Vec4 e5 = e4 * m_metricSqr;
Vec4 error = e5.SplatX() + e5.SplatY() + e5.SplatZ();
// keep the solution if it wins
if( CompareAnyLessThan( error, besterror ) )
@ -346,28 +347,37 @@ void WeightedClusterFit::Compress4( void* block )
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < 16; i++) {
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), ordered, block );
WriteColourBlock4( beststart.GetVec3(), bestend.GetVec3(), bestindices, block );
// save the error
m_besterror = besterror;
}
}
}
#else
void WeightedClusterFit::Compress3( void* block )
{
void WeightedClusterFit::Compress3( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -379,12 +389,12 @@ void WeightedClusterFit::Compress3( void* block )
int b0 = 0, b1 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
for( int c0 = 0; c0 <= count; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= 16-c0; c1++)
for( int c1 = 0; c1 <= count-c0; c1++)
{
float w2 = m_wsum - w0 - w1;
@ -400,16 +410,9 @@ void WeightedClusterFit::Compress3( void* block )
Vec3 a = (alphax_sum*beta2_sum - betax_sum*alphabeta_sum) * factor;
Vec3 b = (betax_sum*alpha2_sum - alphax_sum*alphabeta_sum) * factor;
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -450,26 +453,35 @@ void WeightedClusterFit::Compress3( void* block )
for(; i < b0+b1; i++) {
bestindices[i] = 2;
}
for(; i < 16; i++) {
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock3( beststart, bestend, ordered, block );
WriteColourBlock3( beststart, bestend, bestindices, block );
// save the error
m_besterror = besterror;
}
}
}
void WeightedClusterFit::Compress4( void* block )
{
int const count = m_colours->GetCount();
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
Vec3 const half( 0.5f );
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 1.0f/31.0f, 1.0f/63.0f, 1.0f/31.0f );
void WeightedClusterFit::Compress4( void* block )
{
// declare variables
Vec3 beststart( 0.0f );
Vec3 bestend( 0.0f );
@ -480,17 +492,17 @@ void WeightedClusterFit::Compress4( void* block )
int b0 = 0, b1 = 0, b2 = 0;
// check all possible clusters for this total order
for( int c0 = 0; c0 <= 16; c0++)
for( int c0 = 0; c0 <= count; c0++)
{
Vec3 x1(0.0f);
float w1 = 0.0f;
for( int c1 = 0; c1 <= 16-c0; c1++)
for( int c1 = 0; c1 <= count-c0; c1++)
{
Vec3 x2(0.0f);
float w2 = 0.0f;
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
for( int c2 = 0; c2 <= count-c0-c1; c2++)
{
float w3 = m_wsum - w0 - w1 - w2;
@ -505,16 +517,9 @@ void WeightedClusterFit::Compress4( void* block )
Vec3 a = ( alphax_sum*beta2_sum - betax_sum*alphabeta_sum )*factor;
Vec3 b = ( betax_sum*alpha2_sum - alphax_sum*alphabeta_sum )*factor;
// clamp the output to [0, 1]
Vec3 const one( 1.0f );
Vec3 const zero( 0.0f );
// clamp to the grid
a = Min( one, Max( zero, a ) );
b = Min( one, Max( zero, b ) );
// clamp to the grid
Vec3 const grid( 31.0f, 63.0f, 31.0f );
Vec3 const gridrcp( 0.03227752766457f, 0.01583151765563f, 0.03227752766457f );
Vec3 const half( 0.5f );
a = Floor( grid*a + half )*gridrcp;
b = Floor( grid*b + half )*gridrcp;
@ -563,23 +568,25 @@ void WeightedClusterFit::Compress4( void* block )
for(; i < b0+b1+b2; i++) {
bestindices[i] = 3;
}
for(; i < 16; i++) {
for(; i < count; i++) {
bestindices[i] = 1;
}
}
// remap the indices
u8 ordered[16];
for( int i = 0; i < 16; ++i )
for( int i = 0; i < count; ++i )
ordered[m_order[i]] = bestindices[i];
m_colours->RemapIndices( ordered, bestindices );
// save the block
WriteColourBlock4( beststart, bestend, ordered, block );
WriteColourBlock4( beststart, bestend, bestindices, block );
// save the error
m_besterror = besterror;
}
}
}
#endif

View File

@ -87,7 +87,10 @@ struct MyErrorHandler : public nvtt::ErrorHandler
{
virtual void error(nvtt::Error e)
{
#if _DEBUG
nvDebugBreak();
#endif
printf("Error: '%s'\n", nvtt::errorString(e));
}
};
@ -131,6 +134,7 @@ int main(int argc, char *argv[])
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool alpha = false;
bool normal = false;
bool color2normal = false;
bool wrapRepeat = false;
@ -154,6 +158,10 @@ int main(int argc, char *argv[])
if (strcmp("-color", argv[i]) == 0)
{
}
else if (strcmp("-alpha", argv[i]) == 0)
{
alpha = true;
}
else if (strcmp("-normal", argv[i]) == 0)
{
normal = true;
@ -254,7 +262,12 @@ int main(int argc, char *argv[])
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
const uint version = nvtt::version();
const uint major = version / 100;
const uint minor = version % 100;
printf("NVIDIA Texture Tools %u.%u - Copyright NVIDIA Corporation 2007\n\n", major, minor);
if (input.isNull())
{
@ -262,6 +275,7 @@ int main(int argc, char *argv[])
printf("Input options:\n");
printf(" -color \tThe input image is a color map (default).\n");
printf(" -alpha \tThe input image has an alpha channel used for transparency.\n");
printf(" -normal \tThe input image is a normal map.\n");
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
@ -281,7 +295,7 @@ int main(int argc, char *argv[])
printf(" -bc4 \tBC4 format (ATI1)\n");
printf(" -bc5 \tBC5 format (3Dc/ATI2)\n\n");
return 1;
return EXIT_FAILURE;
}
// @@ Make sure input file exists.
@ -296,13 +310,13 @@ int main(int argc, char *argv[])
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
return 1;
return EXIT_FAILURE;
}
if (!dds.isSupported() || dds.isTexture3D())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return 1;
return EXIT_FAILURE;
}
uint faceCount;
@ -339,7 +353,7 @@ int main(int argc, char *argv[])
if (!image.load(input))
{
fprintf(stderr, "The file '%s' is not a supported image type.\n", input.str());
return 1;
return EXIT_FAILURE;
}
inputOptions.setTextureLayout(nvtt::TextureType_2D, image.width(), image.height());
@ -355,6 +369,15 @@ int main(int argc, char *argv[])
inputOptions.setWrapMode(nvtt::WrapMode_Clamp);
}
if (alpha)
{
inputOptions.setAlphaMode(nvtt::AlphaMode_Transparency);
}
else
{
inputOptions.setAlphaMode(nvtt::AlphaMode_None);
}
if (normal)
{
setNormalMap(inputOptions);
@ -402,7 +425,7 @@ int main(int argc, char *argv[])
if (outputHandler.stream->isError())
{
fprintf(stderr, "Error opening '%s' for writting\n", output.str());
return 1;
return EXIT_FAILURE;
}
nvtt::Compressor compressor;
@ -430,27 +453,16 @@ int main(int argc, char *argv[])
// fflush(stdout);
// getchar();
/* LARGE_INTEGER temp;
QueryPerformanceFrequency((LARGE_INTEGER*) &temp);
double freq = ((double) temp.QuadPart) / 1000.0;
LARGE_INTEGER start_time;
QueryPerformanceCounter((LARGE_INTEGER*) &start_time);
*/
clock_t start = clock();
compressor.process(inputOptions, compressionOptions, outputOptions);
/*
LARGE_INTEGER end_time;
QueryPerformanceCounter((LARGE_INTEGER*) &end_time);
float diff_time = (float) (((double) end_time.QuadPart - (double) start_time.QuadPart) / freq);
printf("\rtime taken: %.3f seconds\n", diff_time/1000);
*/
if (!compressor.process(inputOptions, compressionOptions, outputOptions))
{
return EXIT_FAILURE;
}
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
return EXIT_SUCCESS;
}