Merge changes from The Witness.

pull/276/head
Ignacio 6 years ago
parent 2075d740c9
commit 9489aed825

@ -31,7 +31,7 @@ MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}")
MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}") MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}")
IF(CMAKE_BUILD_TYPE MATCHES "debug") IF(CMAKE_BUILD_TYPE MATCHES "debug")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.") SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.")
ADD_DEFINITIONS(-D_DEBUG=1) ADD_DEFINITIONS(-D_DEBUG=1)
ENDIF() ENDIF()

@ -1,6 +1,6 @@
NVIDIA Texture Tools is licensed under the MIT license. NVIDIA Texture Tools is licensed under the MIT license.
Copyright (c) 2009-2016 Ignacio Castano Copyright (c) 2009-2017 Ignacio Castaño
Copyright (c) 2007-2009 NVIDIA Corporation Copyright (c) 2007-2009 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person Permission is hereby granted, free of charge, to any person

@ -6,7 +6,7 @@ manipulation tools, designed to be integrated in game tools and asset
processing pipelines. processing pipelines.
The primary features of the library are mipmap and normal map generation, format The primary features of the library are mipmap and normal map generation, format
conversion and DXT compression. conversion, and DXT compression.
### How to build (Windows) ### How to build (Windows)
@ -42,5 +42,5 @@ src/nvtt/tools/compress.cpp
Detailed documentation of the API can be found at: Detailed documentation of the API can be found at:
http://code.google.com/p/nvidia-texture-tools/wiki/ApiDocumentation https://github.com/castano/nvidia-texture-tools/wiki/ApiDocumentation

@ -36,4 +36,6 @@ do
#./nvcompress -silent -alpha -nomips -bc6 $file.$EXT $file.bc6.dds #./nvcompress -silent -alpha -nomips -bc6 $file.$EXT $file.bc6.dds
#./nvimgdiff -alpha $file.$EXT $file.bc6.dds #./nvimgdiff -alpha $file.$EXT $file.bc6.dds
# ETC2-EAC
./nvcompress -silent -alpha -nomips -etc_rgbm
done done

@ -349,9 +349,18 @@ LLVM:
# define POSH_OS_STRING "UNICOS" # define POSH_OS_STRING "UNICOS"
#endif #endif
#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx //ACS if we're in xcode, look at the target conditionals to figure out if this is ios or osx
# define POSH_OS_OSX 1 #if defined __APPLE__
# define POSH_OS_STRING "MacOS X" # include "TargetConditionals.h"
#endif
#if TARGET_OS_IPHONE
# define POSH_OS_IOS 1
# define POSH_OS_STRING "iOS"
#else
# if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
# define POSH_OS_OSX 1
# define POSH_OS_STRING "MacOS X"
# endif
#endif #endif
#if defined __sun__ || defined sun || defined __sun || defined __solaris__ #if defined __sun__ || defined sun || defined __sun || defined __solaris__

@ -1808,7 +1808,7 @@ typedef unsigned long uint64;
{ {
if (block_inten[0] > m_pSorted_luma[n - 1]) if (block_inten[0] > m_pSorted_luma[n - 1])
{ {
const uint min_error = labs(int(block_inten[0] - m_pSorted_luma[n - 1])); const uint min_error = abs(int(block_inten[0] - m_pSorted_luma[n - 1]));
if (min_error >= trial_solution.m_error) if (min_error >= trial_solution.m_error)
continue; continue;
} }
@ -1822,7 +1822,7 @@ typedef unsigned long uint64;
{ {
if (m_pSorted_luma[0] > block_inten[3]) if (m_pSorted_luma[0] > block_inten[3])
{ {
const uint min_error = labs(int(m_pSorted_luma[0] - block_inten[3])); const uint min_error = abs(int(m_pSorted_luma[0] - block_inten[3]));
if (min_error >= trial_solution.m_error) if (min_error >= trial_solution.m_error)
continue; continue;
} }
@ -1914,7 +1914,7 @@ done:
for (uint packed_c = 0; packed_c < limit; packed_c++) for (uint packed_c = 0; packed_c < limit; packed_c++)
{ {
int v = etc1_decode_value(diff, inten, selector, packed_c); int v = etc1_decode_value(diff, inten, selector, packed_c);
uint err = labs(v - static_cast<int>(color)); uint err = abs(v - static_cast<int>(color));
if (err < best_error) if (err < best_error)
{ {
best_error = err; best_error = err;

@ -14,6 +14,7 @@ SET(BC6H_SRCS
zohtwo.cpp) zohtwo.cpp)
ADD_LIBRARY(bc6h STATIC ${BC6H_SRCS}) ADD_LIBRARY(bc6h STATIC ${BC6H_SRCS})
TARGET_LINK_LIBRARIES(bc6h nvcore nvmath)
IF(NOT WIN32) IF(NOT WIN32)
IF(CMAKE_COMPILER_IS_GNUCXX) IF(CMAKE_COMPILER_IS_GNUCXX)

@ -37,7 +37,7 @@ int Utils::lerp(int a, int b, int i, int denom)
case 3: denom *= 5; i *= 5; // fall through to case 15 case 3: denom *= 5; i *= 5; // fall through to case 15
case 15: weights = denom15_weights_64; break; case 15: weights = denom15_weights_64; break;
case 7: weights = denom7_weights_64; break; case 7: weights = denom7_weights_64; break;
default: nvDebugCheck(0); default: nvUnreachable();
} }
return (a*weights[denom-i] +b*weights[i] + round) >> shift; return (a*weights[denom-i] +b*weights[i] + round) >> shift;

@ -584,7 +584,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
{ {
Vector3 pixels[Tile::TILE_TOTAL]; Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL]; float importance[Tile::TILE_TOTAL];
float err = 0; //float err = 0;
for (int region=0; region<NREGIONS_ONE; ++region) for (int region=0; region<NREGIONS_ONE; ++region)
{ {

@ -672,7 +672,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
{ {
Vector3 pixels[Tile::TILE_TOTAL]; Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL]; float importance[Tile::TILE_TOTAL];
float err = 0; //float err = 0;
for (int region=0; region<NREGIONS_TWO; ++region) for (int region=0; region<NREGIONS_TWO; ++region)
{ {

@ -22,6 +22,7 @@ SET(BC7_SRCS
avpcl_utils.h) avpcl_utils.h)
ADD_LIBRARY(bc7 STATIC ${BC7_SRCS}) ADD_LIBRARY(bc7 STATIC ${BC7_SRCS})
TARGET_LINK_LIBRARIES(bc7 nvcore nvmath)
TARGET_LINK_LIBRARIES(bc7 nvmath) TARGET_LINK_LIBRARIES(bc7 nvmath)

@ -243,7 +243,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex,
static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
{ {
int mode = AVPCL::getmode(in); //int mode = AVPCL::getmode(in);
pat_index = 0; pat_index = 0;
nvAssert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
@ -580,7 +580,7 @@ static float exhaustive(const Vector4 colors[], const float importance[], int np
int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; //bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
int amin, bmin; int amin, bmin;

@ -148,7 +148,7 @@ namespace nv
NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; } NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; } NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
#if NV_CC_MSVC #if NV_NEED_PSEUDOINDEX_WRAPPER
NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) { NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
return m_buffer[i(this)]; return m_buffer[i(this)];
} }

@ -27,7 +27,7 @@
#define NV_FASTCALL __attribute__((fastcall)) #define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline)) inline #define NV_FORCEINLINE __attribute__((always_inline)) inline
#define NV_DEPRECATED __attribute__((deprecated)) #define NV_DEPRECATED __attribute__((deprecated))
#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX #define NV_THREAD_LOCAL __thread
#if __GNUC__ > 2 #if __GNUC__ > 2
#define NV_PURE __attribute__((pure)) #define NV_PURE __attribute__((pure))

@ -31,11 +31,6 @@ bool FileSystem::exists(const char * path)
// PathFileExists requires linking to shlwapi.lib // PathFileExists requires linking to shlwapi.lib
//return PathFileExists(path) != 0; //return PathFileExists(path) != 0;
return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES; return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES;
#elif NV_OS_ORBIS
const int BUFFER_SIZE = 2048;
char file_fullpath[BUFFER_SIZE];
snprintf(file_fullpath, BUFFER_SIZE, "/app0/%s", path);
return sceFiosExistsSync(NULL, file_fullpath);
#else #else
if (FILE * fp = fopen(path, "r")) if (FILE * fp = fopen(path, "r"))
{ {
@ -78,3 +73,31 @@ bool FileSystem::removeFile(const char * path)
// @@ Use unlink or remove? // @@ Use unlink or remove?
return remove(path) == 0; return remove(path) == 0;
} }
#include "StdStream.h" // for fileOpen
bool FileSystem::copyFile(const char * src, const char * dst) {
FILE * fsrc = fileOpen(src, "rb");
if (fsrc == NULL) return false;
NV_ON_RETURN(fclose(fsrc));
FILE * fdst = fileOpen(dst, "wb");
if (fdst == NULL) return false;
NV_ON_RETURN(fclose(fdst));
char buffer[1024];
size_t n;
while ((n = fread(buffer, sizeof(char), sizeof(buffer), fsrc)) > 0) {
if (fwrite(buffer, sizeof(char), n, fdst) != n) {
return false;
}
}
return true;
}

@ -15,7 +15,7 @@ namespace nv
NVCORE_API bool createDirectory(const char * path); NVCORE_API bool createDirectory(const char * path);
NVCORE_API bool changeDirectory(const char * path); NVCORE_API bool changeDirectory(const char * path);
NVCORE_API bool removeFile(const char * path); NVCORE_API bool removeFile(const char * path);
NVCORE_API bool copyFile(const char * src, const char * dst);
} // FileSystem namespace } // FileSystem namespace
} // nv namespace } // nv namespace

@ -33,6 +33,8 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
#else // If typeof not available: #else // If typeof not available:
#define NV_NEED_PSEUDOINDEX_WRAPPER 1
#include <new> // placement new #include <new> // placement new
struct PseudoIndexWrapper { struct PseudoIndexWrapper {

@ -2,6 +2,7 @@
#include "Memory.h" #include "Memory.h"
#include "Debug.h" #include "Debug.h"
#include "Utils.h"
#include <stdlib.h> #include <stdlib.h>
@ -56,6 +57,7 @@ void * realloc(void * ptr, size_t size)
#endif #endif
} }
/* No need to override this unless we want line info. /* No need to override this unless we want line info.
void * operator new (size_t size) throw() void * operator new (size_t size) throw()
{ {
@ -116,4 +118,32 @@ void operator delete(void* p, const std::nothrow_t&) throw()
#endif // NV_OVERRIDE_ALLOC #endif // NV_OVERRIDE_ALLOC
void * nv::aligned_malloc(size_t size, size_t alignment)
{
// alignment must be a power of two, multiple of sizeof(void*)
nvDebugCheck(isPowerOfTwo(alignment));
nvDebugCheck((alignment & (sizeof(void*) - 1)) == 0);
#if NV_OS_WIN32 || NV_OS_DURANGO
return _aligned_malloc(size, alignment);
#elif NV_OS_DARWIN && !NV_OS_IOS
void * ptr = NULL;
posix_memalign(&ptr, alignment, size);
return ptr;
#elif NV_OS_LINUX
return memalign(alignment, size)
#else // NV_OS_ORBIS || NV_OS_IOS
// @@ IC: iOS appears to be 16 byte aligned, should we check alignment and assert if we request a higher alignment factor?
return ::malloc(size);
#endif
}
void nv::aligned_free(void * ptr)
{
#if NV_OS_WIN32 || NV_OS_DURANGO
_aligned_free(ptr);
#else
::free(ptr);
#endif
}

@ -7,10 +7,16 @@
#include "nvcore.h" #include "nvcore.h"
#include <stdlib.h> // malloc(), realloc() and free() #include <stdlib.h> // malloc(), realloc() and free()
#include <string.h> // memset
//#include <stddef.h> // size_t //#include <stddef.h> // size_t
//#include <new> // new and delete //#include <new> // new and delete
#define TRACK_MEMORY_LEAKS 0
#if TRACK_MEMORY_LEAKS
#include <vld.h>
#endif
#if NV_CC_GNUC #if NV_CC_GNUC
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16))) # define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
@ -41,6 +47,8 @@ extern "C" {
#endif #endif
namespace nv { namespace nv {
NVCORE_API void * aligned_malloc(size_t size, size_t alignment);
NVCORE_API void aligned_free(void * );
// C++ helpers. // C++ helpers.
template <typename T> NV_FORCEINLINE T * malloc(size_t count) { template <typename T> NV_FORCEINLINE T * malloc(size_t count) {

@ -113,7 +113,7 @@ namespace nv
public: public:
// BaseClass must implement addRef() and release(). // BaseClass must implement addRef() and release().
typedef SmartPtr<BaseClass> ThisType; typedef SmartPtr<BaseClass> ThisType;
/// Default ctor. /// Default ctor.
SmartPtr() : m_ptr(NULL) SmartPtr() : m_ptr(NULL)

@ -213,9 +213,12 @@ namespace nv
#elif NV_OS_LINUX #elif NV_OS_LINUX
return (uint)fread_unlocked(data, 1, len, m_fp); return (uint)fread_unlocked(data, 1, len, m_fp);
#elif NV_OS_DARWIN #elif NV_OS_DARWIN
// @@ No error checking, always returns len. // This is rather lame. Not sure if it's faster than the locked version.
for (uint i = 0; i < len; i++) { for (uint i = 0; i < len; i++) {
((char *)data)[i] = getc_unlocked(m_fp); ((char *)data)[i] = getc_unlocked(m_fp);
if (feof_unlocked(m_fp) != 0) {
return i;
}
} }
return len; return len;
#else #else

@ -347,26 +347,36 @@ StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg )
} }
/** Append a string. */ // Append a character.
StringBuilder & StringBuilder::append( const char * s ) StringBuilder & StringBuilder::append( char c )
{ {
return append(s, U32(strlen( s ))); return append(&c, 1);
} }
// Append a string.
StringBuilder & StringBuilder::append( const char * s )
{
return append(s, U32(strlen( s )));
}
/** Append a string. */ // Append a string.
StringBuilder & StringBuilder::append(const char * s, uint len) StringBuilder & StringBuilder::append(const char * s, uint len)
{ {
nvDebugCheck(s != NULL); nvDebugCheck(s != NULL);
uint offset = length(); uint offset = length();
const uint size = offset + len + 1; const uint size = offset + len + 1;
reserve(size); reserve(size);
strCpy(m_str + offset, len + 1, s, len); strCpy(m_str + offset, len + 1, s, len);
return *this; return *this;
} }
StringBuilder & StringBuilder::append(const StringBuilder & str)
{
return append(str.m_str, str.length());
}
/** Append a formatted string. */ /** Append a formatted string. */
StringBuilder & StringBuilder::appendFormat( const char * fmt, ... ) StringBuilder & StringBuilder::appendFormat( const char * fmt, ... )
@ -516,6 +526,19 @@ StringBuilder & StringBuilder::copy( const StringBuilder & s )
return *this; return *this;
} }
void StringBuilder::removeChar(char c)
{
char * src = strchr(m_str, c);
if (src) {
char * dst = src;
src++;
while (*src) {
*dst++ = *src++;
}
*dst = '\0';
}
}
bool StringBuilder::endsWith(const char * str) const bool StringBuilder::endsWith(const char * str) const
{ {
uint l = uint(strlen(str)); uint l = uint(strlen(str));
@ -530,7 +553,7 @@ bool StringBuilder::beginsWith(const char * str) const
return strncmp(m_str, str, l) == 0; return strncmp(m_str, str, l) == 0;
} }
// Find given char starting from the end. // Find given char starting from the end. Why not use strrchr!?
char * StringBuilder::reverseFind(char c) char * StringBuilder::reverseFind(char c)
{ {
int length = (int)strlen(m_str) - 1; int length = (int)strlen(m_str) - 1;
@ -563,6 +586,19 @@ char * StringBuilder::release()
return str; return str;
} }
// Take ownership of string.
void StringBuilder::acquire(char * str)
{
if (str) {
m_size = strLen(str) + 1;
m_str = str;
}
else {
m_size = 0;
m_str = NULL;
}
}
// Swap strings. // Swap strings.
void nv::swap(StringBuilder & a, StringBuilder & b) { void nv::swap(StringBuilder & a, StringBuilder & b) {
swap(a.m_size, b.m_size); swap(a.m_size, b.m_size);
@ -585,19 +621,20 @@ const char * Path::extension() const
/*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) { /*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) {
nvCheck(path != NULL); if (path != NULL) {
for (int i = 0;; i++) {
for (int i = 0;; i++) { if (path[i] == '\0') break;
if (path[i] == '\0') break; if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator;
if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator; }
} }
} }
/// Toggles path separators (ie. \\ into /). /// Toggles path separators (ie. \\ into /).
void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/) void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/)
{ {
nvCheck(!isNull()); if (!isNull()) {
translatePath(m_str, pathSeparator); translatePath(m_str, pathSeparator);
}
} }
void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/) void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/)

@ -105,8 +105,10 @@ namespace nv
StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3))); StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
StringBuilder & formatList( const char * format, va_list arg ); StringBuilder & formatList( const char * format, va_list arg );
StringBuilder & append(char c);
StringBuilder & append(const char * str); StringBuilder & append(const char * str);
StringBuilder & append(const char * str, uint len); StringBuilder & append(const char * str, uint len);
StringBuilder & append(const StringBuilder & str);
StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3))); StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
StringBuilder & appendFormatList(const char * format, va_list arg); StringBuilder & appendFormatList(const char * format, va_list arg);
@ -122,6 +124,8 @@ namespace nv
StringBuilder & toLower(); StringBuilder & toLower();
StringBuilder & toUpper(); StringBuilder & toUpper();
void removeChar(char c);
bool endsWith(const char * str) const; bool endsWith(const char * str) const;
bool beginsWith(const char * str) const; bool beginsWith(const char * str) const;
@ -129,15 +133,16 @@ namespace nv
char * reverseFind(char c); char * reverseFind(char c);
void reset(); void reset();
bool isNull() const { return m_size == 0; } NV_FORCEINLINE bool isNull() const { return m_size == 0; }
// const char * accessors // const char * accessors
//operator const char * () const { return m_str; } //operator const char * () const { return m_str; }
//operator char * () { return m_str; } //operator char * () { return m_str; }
const char * str() const { return m_str; } NV_FORCEINLINE const char * str() const { return m_str; }
char * str() { return m_str; } NV_FORCEINLINE char * str() { return m_str; }
char * release(); char * release(); // Release ownership of string.
void acquire(char *); // Take ownership of string.
/// Implement value semantics. /// Implement value semantics.
StringBuilder & operator=( const StringBuilder & s ) { StringBuilder & operator=( const StringBuilder & s ) {
@ -280,25 +285,25 @@ namespace nv
/// Equal operator. /// Equal operator.
bool operator==( const String & str ) const bool operator==( const String & str ) const
{ {
return strMatch(str.data, data); return strEqual(str.data, data);
} }
/// Equal operator. /// Equal operator.
bool operator==( const char * str ) const bool operator==( const char * str ) const
{ {
return strMatch(str, data); return strEqual(str, data);
} }
/// Not equal operator. /// Not equal operator.
bool operator!=( const String & str ) const bool operator!=( const String & str ) const
{ {
return !strMatch(str.data, data); return !strEqual(str.data, data);
} }
/// Not equal operator. /// Not equal operator.
bool operator!=( const char * str ) const bool operator!=( const char * str ) const
{ {
return !strMatch(str, data); return !strEqual(str, data);
} }
/// Returns true if this string is the null string. /// Returns true if this string is the null string.

@ -76,13 +76,13 @@ namespace nv
void advance(uint offset) { seek(tell() + offset); } void advance(uint offset) { seek(tell() + offset); }
// friends // friends
friend Stream & operator<<( Stream & s, bool & c ) { friend Stream & operator<<( Stream & s, bool & c ) {
#if NV_OS_DARWIN && !NV_CC_CPP11 #if NV_OS_DARWIN && !NV_CC_CPP11
nvStaticCheck(sizeof(bool) == 4); nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0; uint8 b = c ? 1 : 0;
s.serialize( &b, 1 ); s.serialize( &b, 1 );
c = (b == 1); c = (b != 0);
#else #else
nvStaticCheck(sizeof(bool) == 1); nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 ); s.serialize( &c, 1 );

@ -39,6 +39,28 @@ namespace nv
// These intentionally look like casts. // These intentionally look like casts.
// uint64 casts:
template <typename T> inline uint64 U64(T x) { return x; }
//template <> inline uint64 U64<uint64>(uint64 x) { return x; }
template <> inline uint64 U64<int64>(int64 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U32<uint32>(uint32 x) { return x; }
template <> inline uint64 U64<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U64<uint16>(uint16 x) { return x; }
template <> inline uint64 U64<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U64<uint8>(uint8 x) { return x; }
template <> inline uint64 U64<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint64)x; }
// int64 casts:
template <typename T> inline int64 I64(T x) { return x; }
template <> inline int64 I64<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT64_MAX); return (int64)x; }
//template <> inline uint64 U64<int64>(int64 x) { return x; }
//template <> inline uint64 U32<uint32>(uint32 x) { return x; }
//template <> inline uint64 U64<int32>(int32 x) { return x; }
//template <> inline uint64 U64<uint16>(uint16 x) { return x; }
//template <> inline uint64 U64<int16>(int16 x) { return x; }
//template <> inline uint64 U64<uint8>(uint8 x) { return x; }
//template <> inline uint64 U64<int8>(int8 x) { return x; }
// uint32 casts: // uint32 casts:
template <typename T> inline uint32 U32(T x) { return x; } template <typename T> inline uint32 U32(T x) { return x; }
template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; } template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
@ -50,6 +72,11 @@ namespace nv
//template <> inline uint32 U32<uint8>(uint8 x) { return x; } //template <> inline uint32 U32<uint8>(uint8 x) { return x; }
template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; } template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
#if NV_OS_DARWIN
template <> inline uint32 U32<unsigned long>(unsigned long x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
template <> inline uint32 U32<long>(long x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
#endif
// int32 casts: // int32 casts:
template <typename T> inline int32 I32(T x) { return x; } template <typename T> inline int32 I32(T x) { return x; }
template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; } template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
@ -182,7 +209,7 @@ namespace nv
* @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
* @note nextPowerOfTwo(x) = 2 << log2(x-1) * @note nextPowerOfTwo(x) = 2 << log2(x-1)
*/ */
inline uint nextPowerOfTwo( uint x ) inline uint32 nextPowerOfTwo(uint32 x)
{ {
nvDebugCheck( x != 0 ); nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction. #if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
@ -202,8 +229,19 @@ namespace nv
#endif #endif
} }
/// Return true if @a n is a power of two. inline uint64 nextPowerOfTwo(uint64 x)
inline bool isPowerOfTwo( uint n ) {
nvDebugCheck(x != 0);
uint p = 1;
while (x > p) {
p += p;
}
return p;
}
// @@ Should I just use a macro instead?
template <typename T>
inline bool isPowerOfTwo(T n)
{ {
return (n & (n-1)) == 0; return (n & (n-1)) == 0;
} }

@ -56,6 +56,7 @@
# define NV_OS_MINGW 1 # define NV_OS_MINGW 1
# define NV_OS_WIN32 1 # define NV_OS_WIN32 1
#elif defined POSH_OS_OSX #elif defined POSH_OS_OSX
# define NV_OS_OSX 1 // IC: Adding this, because iOS defines NV_OS_DARWIN too.
# define NV_OS_DARWIN 1 # define NV_OS_DARWIN 1
# define NV_OS_UNIX 1 # define NV_OS_UNIX 1
#elif defined POSH_OS_IOS #elif defined POSH_OS_IOS
@ -78,9 +79,9 @@
// Threading: // Threading:
// some platforms don't implement __thread or similar for thread-local-storage // some platforms don't implement __thread or similar for thread-local-storage
#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios? #if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS
# define NV_OS_USE_PTHREAD 1 # define NV_OS_USE_PTHREAD 1
# if NV_OS_DARWIN || NV_OS_IOS # if 0 //Apple finally added TLS support to iOS!// NV_OS_IOS
# define NV_OS_HAS_TLS_QUALIFIER 0 # define NV_OS_HAS_TLS_QUALIFIER 0
# else # else
# define NV_OS_HAS_TLS_QUALIFIER 1 # define NV_OS_HAS_TLS_QUALIFIER 1
@ -96,7 +97,7 @@
// NV_CPU_X86_64 // NV_CPU_X86_64
// NV_CPU_PPC // NV_CPU_PPC
// NV_CPU_ARM // NV_CPU_ARM
// NV_CPU_AARCH64 // NV_CPU_ARM_64
#define NV_CPU_STRING POSH_CPU_STRING #define NV_CPU_STRING POSH_CPU_STRING
@ -110,7 +111,7 @@
#elif defined POSH_CPU_STRONGARM #elif defined POSH_CPU_STRONGARM
# define NV_CPU_ARM 1 # define NV_CPU_ARM 1
#elif defined POSH_CPU_AARCH64 #elif defined POSH_CPU_AARCH64
# define NV_CPU_AARCH64 1 # define NV_CPU_ARM_64 1
#else #else
# error "Unsupported CPU" # error "Unsupported CPU"
#endif #endif
@ -148,10 +149,16 @@
#endif #endif
// Endiannes: // Endiannes:
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN // @@ POSH endian detection is broken for arm64 on iOS. They are bi-endian and iOS sets all their processors to little endian by default.
#define NV_BIG_ENDIAN POSH_BIG_ENDIAN #if NV_OS_IOS
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING # define NV_LITTLE_ENDIAN 1
# define NV_BIG_ENDIAN 0
# define NV_ENDIAN_STRING "little"
#else
# define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
# define NV_BIG_ENDIAN POSH_BIG_ENDIAN
# define NV_ENDIAN_STRING POSH_ENDIAN_STRING
#endif
// Define the right printf prefix for size_t arguments: // Define the right printf prefix for size_t arguments:
#if POSH_64BIT_POINTER #if POSH_64BIT_POINTER
@ -164,6 +171,28 @@
// cmake config // cmake config
#include "nvconfig.h" #include "nvconfig.h"
#if NV_OS_DARWIN
#include <stdint.h>
//#include <inttypes.h>
// Type definitions:
typedef uint8_t uint8;
typedef int8_t int8;
typedef uint16_t uint16;
typedef int16_t int16;
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint64_t uint64;
typedef int64_t int64;
// POSH gets this wrong due to __LP64__
#undef POSH_I64_PRINTF_PREFIX
#define POSH_I64_PRINTF_PREFIX "ll"
#else
// Type definitions: // Type definitions:
typedef posh_u8_t uint8; typedef posh_u8_t uint8;
@ -175,8 +204,23 @@ typedef posh_i16_t int16;
typedef posh_u32_t uint32; typedef posh_u32_t uint32;
typedef posh_i32_t int32; typedef posh_i32_t int32;
//#if NV_OS_DARWIN
// OSX-64 is supposed to be LP64 (longs and pointers are 64 bits), thus uint64 is defined as
// unsigned long. However, some OSX headers define it as unsigned long long, producing errors,
// even though both types are 64 bit. Ideally posh should handle that, but it has not been
// updated in ages, so here I'm just falling back to the standard C99 types defined in inttypes.h
//#include <inttypes.h>
//typedef posh_u64_t uint64_t;
//typedef posh_i64_t int64_t;
//#else
typedef posh_u64_t uint64; typedef posh_u64_t uint64;
typedef posh_i64_t int64; typedef posh_i64_t int64;
//#endif
#if NV_OS_DARWIN
// To avoid duplicate definitions.
#define _UINT64
#endif
#endif
// Aliases // Aliases
typedef uint32 uint; typedef uint32 uint;
@ -246,8 +290,10 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_COMPILER_CHECK(sizeof(int32) == 4); NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4); NV_COMPILER_CHECK(sizeof(uint32) == 4);
#include <stddef.h> // for size_t
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) template <typename T, size_t N> char (&ArraySizeHelper(T (&array)[N]))[N];
#define NV_ARRAY_SIZE(x) sizeof(ArraySizeHelper(x))
//#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#if 0 // Disabled in The Witness. #if 0 // Disabled in The Witness.
#if NV_CC_MSVC #if NV_CC_MSVC
@ -269,8 +315,38 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \ NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
} }
namespace nv {
template <typename F>
struct ScopeExit {
ScopeExit(F f) : f(f) {}
~ScopeExit() { f(); }
F f;
};
template <typename F>
ScopeExit<F> MakeScopeExit(F f) {
return ScopeExit<F>(f);
};
}
#define NV_ON_RETURN(code) \
auto NV_STRING_JOIN2(scope_exit_, __LINE__) = nv::MakeScopeExit([=](){code;})
// Indicate the compiler that the parameter is not used to suppress compier warnings. // Indicate the compiler that the parameter is not used to suppress compier warnings.
#if NV_CC_MSVC
#define NV_UNUSED(a) ((a)=(a)) #define NV_UNUSED(a) ((a)=(a))
#else
#define NV_UNUSED(a) _Pragma(NV_STRING(unused(a)))
#endif
#if NV_CC_GNUC || NV_CC_CLANG
#define NV_LIKELY(x) __builtin_expect(!!(x), 1)
#define NV_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define NV_LIKELY(x) x
#define NV_UNLIKELY(x) x
#endif
// Null index. @@ Move this somewhere else... it's only used by nvmesh. // Null index. @@ Move this somewhere else... it's only used by nvmesh.
//const unsigned int NIL = unsigned int(~0); //const unsigned int NIL = unsigned int(~0);

@ -632,44 +632,45 @@ void BlockCTX1::setIndices(int * idx)
/// Decode BC6 block. /// Decode BC6 block.
void BlockBC6::decodeBlock(Vector3 colors[16]) const void BlockBC6::decodeBlock(Vector4 colors[16]) const
{ {
ZOH::Tile tile(4, 4); ZOH::Tile tile(4, 4);
ZOH::decompress((const char *)data, tile); ZOH::decompress((const char *)data, tile);
// Convert ZOH's tile struct to Vector3, and convert half to float. // Convert ZOH's tile struct to Vector3, and convert half to float.
for (uint y = 0; y < 4; ++y) for (uint y = 0; y < 4; ++y)
{ {
for (uint x = 0; x < 4; ++x) for (uint x = 0; x < 4; ++x)
{ {
uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x); uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x);
uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y); uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y);
uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z); uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z);
colors[y * 4 + x].x = to_float(rHalf); colors[y * 4 + x].x = to_float(rHalf);
colors[y * 4 + x].y = to_float(gHalf); colors[y * 4 + x].y = to_float(gHalf);
colors[y * 4 + x].z = to_float(bHalf); colors[y * 4 + x].z = to_float(bHalf);
} colors[y * 4 + x].w = 1.0f;
} }
}
} }
/// Decode BC7 block. /// Decode BC7 block.
void BlockBC7::decodeBlock(ColorBlock * block) const void BlockBC7::decodeBlock(ColorBlock * block) const
{ {
AVPCL::Tile tile(4, 4); AVPCL::Tile tile(4, 4);
AVPCL::decompress((const char *)data, tile); AVPCL::decompress((const char *)data, tile);
// Convert AVPCL's tile struct back to NVTT's. // Convert AVPCL's tile struct back to NVTT's.
for (uint y = 0; y < 4; ++y) for (uint y = 0; y < 4; ++y)
{ {
for (uint x = 0; x < 4; ++x) for (uint x = 0; x < 4; ++x)
{ {
Vector4 rgba = tile.data[y][x]; Vector4 rgba = tile.data[y][x];
// Note: decoded rgba values are in [0, 255] range and should be an integer, // Note: decoded rgba values are in [0, 255] range and should be an integer,
// because BC7 never uses more than 8 bits per channel. So no need to round. // because BC7 never uses more than 8 bits per channel. So no need to round.
block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w)); block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w));
} }
} }
} }

@ -36,6 +36,7 @@ namespace nv
struct AlphaBlock4x4; struct AlphaBlock4x4;
class Stream; class Stream;
class Vector3; class Vector3;
class Vector4;
/// DXT1 block. /// DXT1 block.
@ -220,7 +221,7 @@ namespace nv
struct BlockBC6 struct BlockBC6
{ {
uint8 data[16]; // Not even going to try to write a union for this thing. uint8 data[16]; // Not even going to try to write a union for this thing.
void decodeBlock(Vector3 colors[16]) const; void decodeBlock(Vector4 colors[16]) const;
}; };
/// BC7 block. /// BC7 block.

@ -14,7 +14,8 @@ SET(IMAGE_SRCS
NormalMap.h NormalMap.cpp NormalMap.h NormalMap.cpp
PixelFormat.h PixelFormat.h
PsdFile.h PsdFile.h
TgaFile.h) TgaFile.h
KtxFile.h KtxFile.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

@ -454,7 +454,8 @@ namespace
{ D3DFMT_L8, DXGI_FORMAT_R8_UNORM , { 8, 0xFF, 0, 0, 0 } }, { D3DFMT_L8, DXGI_FORMAT_R8_UNORM , { 8, 0xFF, 0, 0, 0 } },
{ D3DFMT_L16, DXGI_FORMAT_R16_UNORM, { 16, 0xFFFF, 0, 0, 0 } }, { D3DFMT_L16, DXGI_FORMAT_R16_UNORM, { 16, 0xFFFF, 0, 0, 0 } },
{ D3DFMT_A8L8, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0, 0, 0xFF00 } }, { D3DFMT_A8L8, 0, { 16, 0xFF, 0, 0, 0xFF00 } },
{ 0, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0xFF00, 0, 0 } },
}; };
static const uint s_formatCount = NV_ARRAY_SIZE(s_formats); static const uint s_formatCount = NV_ARRAY_SIZE(s_formats);
@ -635,7 +636,7 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{ {
// set fourcc pixel format. // set fourcc pixel format.
this->pf.flags = DDPF_FOURCC; this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); this->pf.fourcc = NV_MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0; this->pf.bitcount = 0;
this->pf.rmask = 0; this->pf.rmask = 0;
@ -659,7 +660,7 @@ void DDSHeader::setFormatCode(uint32 code)
void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3) void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{ {
this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3); this->pf.bitcount = NV_MAKEFOURCC(c0, c1, c2, c3);
} }
@ -1445,7 +1446,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
{ {
BlockBC6 block; BlockBC6 block;
*stream << block; *stream << block;
Vector3 colors[16]; Vector4 colors[16];
block.decodeBlock(colors); block.decodeBlock(colors);
// Clamp to [0, 1] and round to 8-bit // Clamp to [0, 1] and round to 8-bit
@ -1453,7 +1454,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
{ {
for (int x = 0; x < 4; ++x) for (int x = 0; x < 4; ++x)
{ {
Vector3 px = colors[y*4 + x]; Vector4 px = colors[y*4 + x];
rgba->color(x, y).setRGBA( rgba->color(x, y).setRGBA(
ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f), ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f),
ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f), ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f),
@ -1535,7 +1536,7 @@ uint DirectDrawSurface::surfaceSize(uint mipmap) const
else { else {
w = (w + 3) / 4; w = (w + 3) / 4;
h = (h + 3) / 4; h = (h + 3) / 4;
d = d; // @@ How are 3D textures aligned? //d = d; // @@ How are 3D textures aligned?
return blockSize * w * h * d; return blockSize * w * h * d;
} }
} }

@ -27,11 +27,9 @@
#include "nvimage.h" #include "nvimage.h"
#if !defined(MAKEFOURCC) #define NV_MAKEFOURCC(ch0, ch1, ch2, ch3) \
#define MAKEFOURCC(ch0, ch1, ch2, ch3) \
(uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \ (uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \
(uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 )) (uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 ))
#endif
namespace nv namespace nv
{ {
@ -101,19 +99,26 @@ namespace nv
enum FOURCC enum FOURCC
{ {
FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'), FOURCC_NVTT = NV_MAKEFOURCC('N', 'V', 'T', 'T'),
FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '), FOURCC_DDS = NV_MAKEFOURCC('D', 'D', 'S', ' '),
FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), FOURCC_DXT1 = NV_MAKEFOURCC('D', 'X', 'T', '1'),
FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), FOURCC_DXT2 = NV_MAKEFOURCC('D', 'X', 'T', '2'),
FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), FOURCC_DXT3 = NV_MAKEFOURCC('D', 'X', 'T', '3'),
FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), FOURCC_DXT4 = NV_MAKEFOURCC('D', 'X', 'T', '4'),
FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), FOURCC_DXT5 = NV_MAKEFOURCC('D', 'X', 'T', '5'),
FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'), FOURCC_RXGB = NV_MAKEFOURCC('R', 'X', 'G', 'B'),
FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), FOURCC_ATI1 = NV_MAKEFOURCC('A', 'T', 'I', '1'),
FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), FOURCC_ATI2 = NV_MAKEFOURCC('A', 'T', 'I', '2'),
FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'), FOURCC_A2XY = NV_MAKEFOURCC('A', '2', 'X', 'Y'),
FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'), FOURCC_DX10 = NV_MAKEFOURCC('D', 'X', '1', '0'),
FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'), FOURCC_UVER = NV_MAKEFOURCC('U', 'V', 'E', 'R'),
FOURCC_BC6H = NV_MAKEFOURCC('B', 'C', '6', 'H'),
FOURCC_BC7L = NV_MAKEFOURCC('B', 'C', '7', 'L'),
FOURCC_PVR0 = NV_MAKEFOURCC('P', 'V', 'R', '0'),
FOURCC_PVR1 = NV_MAKEFOURCC('P', 'V', 'R', '1'),
FOURCC_PVR2 = NV_MAKEFOURCC('P', 'V', 'R', '2'),
FOURCC_PVR3 = NV_MAKEFOURCC('P', 'V', 'R', '3'),
}; };

@ -1,460 +1,513 @@
#include "ErrorMetric.h" #include "ErrorMetric.h"
#include "FloatImage.h" #include "FloatImage.h"
#include "Filter.h" #include "Filter.h"
#include "nvmath/Matrix.h" #include "nvmath/Matrix.h"
#include "nvmath/Vector.inl" #include "nvmath/Vector.inl"
#include <float.h> // FLT_MAX #include <float.h> // FLT_MAX
using namespace nv; using namespace nv;
float nv::rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) float nv::rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight)
{ {
if (!sameLayout(img, ref)) { if (!sameLayout(img, ref)) {
return FLT_MAX; return FLT_MAX;
} }
nvDebugCheck(img->componentCount() == 4); nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4); nvDebugCheck(ref->componentCount() == 4);
double mse = 0; double mse = 0;
const uint count = img->pixelCount(); const uint count = img->pixelCount();
for (uint i = 0; i < count; i++) for (uint i = 0; i < count; i++)
{ {
float r0 = ref->pixel(i + count * 0); float r0 = ref->pixel(i + count * 0);
float g0 = ref->pixel(i + count * 1); float g0 = ref->pixel(i + count * 1);
float b0 = ref->pixel(i + count * 2); float b0 = ref->pixel(i + count * 2);
float a0 = ref->pixel(i + count * 3); float a0 = ref->pixel(i + count * 3);
float r1 = img->pixel(i + count * 0); float r1 = img->pixel(i + count * 0);
float g1 = img->pixel(i + count * 1); float g1 = img->pixel(i + count * 1);
float b1 = img->pixel(i + count * 2); float b1 = img->pixel(i + count * 2);
//float a1 = img->pixel(i + count * 3); //float a1 = img->pixel(i + count * 3);
float r = r0 - r1; float r = r0 - r1;
float g = g0 - g1; float g = g0 - g1;
float b = b0 - b1; float b = b0 - b1;
float a = 1; float a = 1;
if (alphaWeight) a = a0 * a0; // @@ a0*a1 or a0*a0 ? if (alphaWeight) a = a0 * a0; // @@ a0*a1 or a0*a0 ?
mse += (r * r) * a; mse += (r * r) * a;
mse += (g * g) * a; mse += (g * g) * a;
mse += (b * b) * a; mse += (b * b) * a;
} }
return float(sqrt(mse / count)); return float(sqrt(mse / count));
} }
float nv::rmsAlphaError(const FloatImage * ref, const FloatImage * img) float nv::rmsAlphaError(const FloatImage * ref, const FloatImage * img)
{ {
if (!sameLayout(img, ref)) { if (!sameLayout(img, ref)) {
return FLT_MAX; return FLT_MAX;
} }
nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4); nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
double mse = 0; double mse = 0;
const uint count = img->pixelCount(); const uint count = img->pixelCount();
for (uint i = 0; i < count; i++) for (uint i = 0; i < count; i++)
{ {
float a0 = img->pixel(i + count * 3); float a0 = img->pixel(i + count * 3);
float a1 = ref->pixel(i + count * 3); float a1 = ref->pixel(i + count * 3);
float a = a0 - a1; float a = a0 - a1;
mse += a * a; mse += a * a;
} }
return float(sqrt(mse / count)); return float(sqrt(mse / count));
} }
float nv::averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) float nv::averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight)
{ {
if (!sameLayout(img, ref)) { if (!sameLayout(img, ref)) {
return FLT_MAX; return FLT_MAX;
} }
nvDebugCheck(img->componentCount() == 4); nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4); nvDebugCheck(ref->componentCount() == 4);
double mae = 0; double mae = 0;
const uint count = img->pixelCount(); const uint count = img->pixelCount();
for (uint i = 0; i < count; i++) for (uint i = 0; i < count; i++)
{ {
float r0 = img->pixel(i + count * 0); float r0 = img->pixel(i + count * 0);
float g0 = img->pixel(i + count * 1); float g0 = img->pixel(i + count * 1);
float b0 = img->pixel(i + count * 2); float b0 = img->pixel(i + count * 2);
//float a0 = img->pixel(i + count * 3); //float a0 = img->pixel(i + count * 3);
float r1 = ref->pixel(i + count * 0); float r1 = ref->pixel(i + count * 0);
float g1 = ref->pixel(i + count * 1); float g1 = ref->pixel(i + count * 1);
float b1 = ref->pixel(i + count * 2); float b1 = ref->pixel(i + count * 2);
float a1 = ref->pixel(i + count * 3); float a1 = ref->pixel(i + count * 3);
float r = fabs(r0 - r1); float r = fabs(r0 - r1);
float g = fabs(g0 - g1); float g = fabs(g0 - g1);
float b = fabs(b0 - b1); float b = fabs(b0 - b1);
float a = 1; float a = 1;
if (alphaWeight) a = a1; if (alphaWeight) a = a1;
mae += r * a; mae += r * a;
mae += g * a; mae += g * a;
mae += b * a; mae += b * a;
} }
return float(mae / count); return float(mae / count);
} }
float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img) float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img)
{ {
if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) { if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
return FLT_MAX; return FLT_MAX;
} }
nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4); nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
double mae = 0; double mae = 0;
const uint count = img->width() * img->height(); const uint count = img->width() * img->height();
for (uint i = 0; i < count; i++) for (uint i = 0; i < count; i++)
{ {
float a0 = img->pixel(i + count * 3); float a0 = img->pixel(i + count * 3);
float a1 = ref->pixel(i + count * 3); float a1 = ref->pixel(i + count * 3);
float a = a0 - a1; float a = a0 - a1;
mae += fabs(a); mae += fabs(a);
} }
return float(mae / count); return float(mae / count);
} }
// Color space conversions based on: float nv::rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight)
// http://www.brucelindbloom.com/ {
nvDebugCheck(img->componentCount() == 4);
// Assumes input is in *linear* sRGB color space. nvDebugCheck(ref->componentCount() == 4);
static Vector3 rgbToXyz(Vector3::Arg c)
{ double mse = 0;
Vector3 xyz;
xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z; const uint w0 = ref->width();
xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z; const uint h0 = ref->height();
xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z; const uint d0 = ref->depth();
return xyz;
} const uint w1 = img->width();
const uint h1 = img->height();
static Vector3 xyzToRgb(Vector3::Arg c) const uint d1 = img->depth();
{
Vector3 rgb; for (uint z = 0; z < d0; z++) {
rgb.x = 3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z; for (uint y = 0; y < h0; y++) {
rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z; for (uint x = 0; x < w0; x++) {
rgb.z = 0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z; float r0 = ref->pixel(0, x, y, z);
return rgb; float g0 = ref->pixel(1, x, y, z);
} float b0 = ref->pixel(2, x, y, z);
float a0 = ref->pixel(3, x, y, z);
static float toLinear(float f)
{ float fx = float(x) / w0;
return powf(f, 2.2f); float fy = float(y) / h0;
} float fz = float(z) / d0;
static float toGamma(float f) float r1 = img->sampleLinear(0, fx, fy, fz, wm);
{ float g1 = img->sampleLinear(1, fx, fy, fz, wm);
// @@ Use sRGB space? float b1 = img->sampleLinear(2, fx, fy, fz, wm);
return powf(f, 1.0f/2.2f); float a1 = img->sampleLinear(2, fx, fy, fz, wm);
}
float dr = r0 - r1;
static Vector3 toLinear(Vector3::Arg c) float dg = g0 - g1;
{ float db = b0 - b1;
return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z)); float da = a0 - a1;
}
float w = 1;
static Vector3 toGamma(Vector3::Arg c) if (alphaWeight) w = a0 * a0; // @@ a0*a1 or a0*a0 ?
{
return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z)); mse += (dr * dr) * w;
} mse += (dg * dg) * w;
mse += (db * db) * w;
static float f(float t) mse += (da * da);
{ }
const float epsilon = powf(6.0f/29.0f, 3); }
}
if (t > epsilon) {
return powf(t, 1.0f/3.0f); int count = w0 * h0 * d0;
} return float(sqrt(mse / count));
else { }
return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f;
}
} // Color space conversions based on:
// http://www.brucelindbloom.com/
static float finv(float t)
{ // Assumes input is in *linear* sRGB color space.
if (t > 6.0f / 29.0f) { static Vector3 rgbToXyz(Vector3::Arg c)
return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f); {
} Vector3 xyz;
else { xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z;
return powf(t, 3.0f); xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z;
} xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z;
} return xyz;
}
static Vector3 xyzToCieLab(Vector3::Arg c)
{ static Vector3 xyzToRgb(Vector3::Arg c)
// Normalized white point. {
const float Xn = 0.950456f; Vector3 rgb;
const float Yn = 1.0f; rgb.x = 3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z;
const float Zn = 1.088754f; rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z;
rgb.z = 0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z;
float Xr = c.x / Xn; return rgb;
float Yr = c.y / Yn; }
float Zr = c.z / Zn;
static float toLinear(float f)
float fx = f(Xr); {
float fy = f(Yr); return powf(f, 2.2f);
float fz = f(Zr); }
float L = 116 * fx - 16; static float toGamma(float f)
float a = 500 * (fx - fy); {
float b = 200 * (fy - fz); // @@ Use sRGB space?
return powf(f, 1.0f/2.2f);
return Vector3(L, a, b); }
}
static Vector3 toLinear(Vector3::Arg c)
static Vector3 rgbToCieLab(Vector3::Arg c) {
{ return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z));
return xyzToCieLab(rgbToXyz(toLinear(c))); }
}
static Vector3 toGamma(Vector3::Arg c)
// h is hue-angle in radians {
static Vector3 cieLabToLCh(Vector3::Arg c) return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z));
{ }
return Vector3(c.x, sqrtf(c.y*c.y + c.z*c.z), atan2f(c.y, c.z));
} static float f(float t)
{
static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage) const float epsilon = powf(6.0f/29.0f, 3);
{
nvDebugCheck(rgbImage != NULL && LabImage != NULL); if (t > epsilon) {
nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height()); return powf(t, 1.0f/3.0f);
nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3); }
else {
const uint w = rgbImage->width(); return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f;
const uint h = LabImage->height(); }
}
const float * R = rgbImage->channel(0);
const float * G = rgbImage->channel(1); static float finv(float t)
const float * B = rgbImage->channel(2); {
if (t > 6.0f / 29.0f) {
float * L = LabImage->channel(0); return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f);
float * a = LabImage->channel(1); }
float * b = LabImage->channel(2); else {
return powf(t, 3.0f);
const uint count = w*h; }
for (uint i = 0; i < count; i++) }
{
Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i])); static Vector3 xyzToCieLab(Vector3::Arg c)
L[i] = Lab.x; {
a[i] = Lab.y; // Normalized white point.
b[i] = Lab.z; const float Xn = 0.950456f;
} const float Yn = 1.0f;
} const float Zn = 1.088754f;
float Xr = c.x / Xn;
// Assumes input images are in linear sRGB space. float Yr = c.y / Yn;
float nv::cieLabError(const FloatImage * img0, const FloatImage * img1) float Zr = c.z / Zn;
{
if (!sameLayout(img0, img1)) return FLT_MAX; float fx = f(Xr);
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); float fy = f(Yr);
float fz = f(Zr);
const float * r0 = img0->channel(0);
const float * g0 = img0->channel(1); float L = 116 * fx - 16;
const float * b0 = img0->channel(2); float a = 500 * (fx - fy);
float b = 200 * (fy - fz);
const float * r1 = img1->channel(0);
const float * g1 = img1->channel(1); return Vector3(L, a, b);
const float * b1 = img1->channel(2); }
double error = 0.0f; static Vector3 rgbToCieLab(Vector3::Arg c)
{
const uint count = img0->pixelCount(); return xyzToCieLab(rgbToXyz(toLinear(c)));
for (uint i = 0; i < count; i++) }
{
Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i])); // h is hue-angle in radians
Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i])); static Vector3 cieLabToLCh(Vector3::Arg c)
{
// @@ Measure Delta E. return Vector3(c.x, sqrtf(c.y*c.y + c.z*c.z), atan2f(c.y, c.z));
Vector3 delta = lab0 - lab1; }
error += length(delta); static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage)
} {
nvDebugCheck(rgbImage != NULL && LabImage != NULL);
return float(error / count); nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height());
} nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3);
// Assumes input images are in linear sRGB space. const uint w = rgbImage->width();
float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1) const uint h = LabImage->height();
{
if (!sameLayout(img0, img1)) return FLT_MAX; const float * R = rgbImage->channel(0);
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); const float * G = rgbImage->channel(1);
const float * B = rgbImage->channel(2);
const float kL = 1;
const float kC = 1; float * L = LabImage->channel(0);
const float kH = 1; float * a = LabImage->channel(1);
const float k1 = 0.045f; float * b = LabImage->channel(2);
const float k2 = 0.015f;
const uint count = w*h;
const float sL = 1; for (uint i = 0; i < count; i++)
{
const float * r0 = img0->channel(0); Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i]));
const float * g0 = img0->channel(1); L[i] = Lab.x;
const float * b0 = img0->channel(2); a[i] = Lab.y;
b[i] = Lab.z;
const float * r1 = img1->channel(0); }
const float * g1 = img1->channel(1); }
const float * b1 = img1->channel(2);
double error = 0.0f; // Assumes input images are in linear sRGB space.
float nv::cieLabError(const FloatImage * img0, const FloatImage * img1)
const uint count = img0->pixelCount(); {
for (uint i = 0; i < count; ++i) if (!sameLayout(img0, img1)) return FLT_MAX;
{ nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
Vector3 lch0 = cieLabToLCh(lab0); const float * r0 = img0->channel(0);
Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i])); const float * g0 = img0->channel(1);
Vector3 lch1 = cieLabToLCh(lab1); const float * b0 = img0->channel(2);
const float sC = 1 + k1*lch0.x; const float * r1 = img1->channel(0);
const float sH = 1 + k2*lch0.x; const float * g1 = img1->channel(1);
const float * b1 = img1->channel(2);
// @@ Measure Delta E using the 1994 definition
Vector3 labDelta = lab0 - lab1; double error = 0.0f;
Vector3 lchDelta = lch0 - lch1;
const uint count = img0->pixelCount();
double deltaLsq = powf(lchDelta.x / (kL*sL), 2); for (uint i = 0; i < count; i++)
double deltaCsq = powf(lchDelta.y / (kC*sC), 2); {
Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
// avoid possible sqrt of negative value by computing (deltaH/(kH*sH))^2 Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i]));
double deltaHsq = powf(labDelta.y, 2) + powf(labDelta.z, 2) - powf(lchDelta.y, 2);
deltaHsq /= powf(kH*sH, 2); // @@ Measure Delta E.
Vector3 delta = lab0 - lab1;
error += sqrt(deltaLsq + deltaCsq + deltaHsq);
} error += length(delta);
}
return float(error / count);
} return float(error / count);
}
float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1)
{ // Assumes input images are in linear sRGB space.
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1)
return FLT_MAX; {
} if (!sameLayout(img0, img1)) return FLT_MAX;
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
uint w = img0->width(); const float kL = 1;
uint h = img0->height(); const float kC = 1;
uint d = img0->depth(); const float kH = 1;
const float k1 = 0.045f;
FloatImage lab0, lab1; // Original images in CIE-Lab space. const float k2 = 0.015f;
lab0.allocate(3, w, h, d);
lab1.allocate(3, w, h, d); const float sL = 1;
// Convert input images to CIE-Lab. const float * r0 = img0->channel(0);
rgbToCieLab(img0, &lab0); const float * g0 = img0->channel(1);
rgbToCieLab(img1, &lab1); const float * b0 = img0->channel(2);
// @@ Convolve each channel by the corresponding filter. const float * r1 = img1->channel(0);
/* const float * g1 = img1->channel(1);
GaussianFilter LFilter(5); const float * b1 = img1->channel(2);
GaussianFilter aFilter(5);
GaussianFilter bFilter(5); double error = 0.0f;
lab0.convolve(0, LFilter); const uint count = img0->pixelCount();
lab0.convolve(1, aFilter); for (uint i = 0; i < count; ++i)
lab0.convolve(2, bFilter); {
Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
lab1.convolve(0, LFilter); Vector3 lch0 = cieLabToLCh(lab0);
lab1.convolve(1, aFilter); Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i]));
lab1.convolve(2, bFilter); Vector3 lch1 = cieLabToLCh(lab1);
*/
// @@ Measure Delta E between lab0 and lab1. const float sC = 1 + k1*lch0.x;
const float sH = 1 + k2*lch0.x;
return 0.0f;
} // @@ Measure Delta E using the 1994 definition
Vector3 labDelta = lab0 - lab1;
Vector3 lchDelta = lch0 - lch1;
// Assumes input images are normal maps.
float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1) double deltaLsq = powf(lchDelta.x / (kL*sL), 2);
{ double deltaCsq = powf(lchDelta.y / (kC*sC), 2);
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
return FLT_MAX; // avoid possible sqrt of negative value by computing (deltaH/(kH*sH))^2
} double deltaHsq = powf(labDelta.y, 2) + powf(labDelta.z, 2) - powf(lchDelta.y, 2);
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); deltaHsq /= powf(kH*sH, 2);
uint w = img0->width(); error += sqrt(deltaLsq + deltaCsq + deltaHsq);
uint h = img0->height(); }
const float * x0 = img0->channel(0); return float(error / count);
const float * y0 = img0->channel(1); }
const float * z0 = img0->channel(2);
float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1)
const float * x1 = img1->channel(0); {
const float * y1 = img1->channel(1); if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
const float * z1 = img1->channel(2); return FLT_MAX;
}
double error = 0.0f; nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
const uint count = w*h; uint w = img0->width();
for (uint i = 0; i < count; i++) uint h = img0->height();
{ uint d = img0->depth();
Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
Vector3 n1 = Vector3(x1[i], y1[i], z1[i]); FloatImage lab0, lab1; // Original images in CIE-Lab space.
lab0.allocate(3, w, h, d);
n0 = 2.0f * n0 - Vector3(1); lab1.allocate(3, w, h, d);
n1 = 2.0f * n1 - Vector3(1);
// Convert input images to CIE-Lab.
n0 = normalizeSafe(n0, Vector3(0), 0.0f); rgbToCieLab(img0, &lab0);
n1 = normalizeSafe(n1, Vector3(0), 0.0f); rgbToCieLab(img1, &lab1);
error += acos(clamp(dot(n0, n1), -1.0f, 1.0f)); // @@ Convolve each channel by the corresponding filter.
} /*
GaussianFilter LFilter(5);
return float(error / count); GaussianFilter aFilter(5);
} GaussianFilter bFilter(5);
float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1) lab0.convolve(0, LFilter);
{ lab0.convolve(1, aFilter);
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { lab0.convolve(2, bFilter);
return FLT_MAX;
} lab1.convolve(0, LFilter);
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); lab1.convolve(1, aFilter);
lab1.convolve(2, bFilter);
uint w = img0->width(); */
uint h = img0->height(); // @@ Measure Delta E between lab0 and lab1.
const float * x0 = img0->channel(0); return 0.0f;
const float * y0 = img0->channel(1); }
const float * z0 = img0->channel(2);
const float * x1 = img1->channel(0); // Assumes input images are normal maps.
const float * y1 = img1->channel(1); float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1)
const float * z1 = img1->channel(2); {
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
double error = 0.0f; return FLT_MAX;
}
const uint count = w*h; nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
for (uint i = 0; i < count; i++)
{ uint w = img0->width();
Vector3 n0 = Vector3(x0[i], y0[i], z0[i]); uint h = img0->height();
Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
const float * x0 = img0->channel(0);
n0 = 2.0f * n0 - Vector3(1); const float * y0 = img0->channel(1);
n1 = 2.0f * n1 - Vector3(1); const float * z0 = img0->channel(2);
n0 = normalizeSafe(n0, Vector3(0), 0.0f); const float * x1 = img1->channel(0);
n1 = normalizeSafe(n1, Vector3(0), 0.0f); const float * y1 = img1->channel(1);
const float * z1 = img1->channel(2);
float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f));
error += angle * angle; double error = 0.0f;
}
const uint count = w*h;
return float(sqrt(error / count)); for (uint i = 0; i < count; i++)
} {
Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
n0 = 2.0f * n0 - Vector3(1);
n1 = 2.0f * n1 - Vector3(1);
n0 = normalizeSafe(n0, Vector3(0), 0.0f);
n1 = normalizeSafe(n1, Vector3(0), 0.0f);
error += acos(clamp(dot(n0, n1), -1.0f, 1.0f));
}
return float(error / count);
}
float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1)
{
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
return FLT_MAX;
}
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
uint w = img0->width();
uint h = img0->height();
const float * x0 = img0->channel(0);
const float * y0 = img0->channel(1);
const float * z0 = img0->channel(2);
const float * x1 = img1->channel(0);
const float * y1 = img1->channel(1);
const float * z1 = img1->channel(2);
double error = 0.0f;
const uint count = w*h;
for (uint i = 0; i < count; i++)
{
Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
n0 = 2.0f * n0 - Vector3(1);
n1 = 2.0f * n1 - Vector3(1);
n0 = normalizeSafe(n0, Vector3(0), 0.0f);
n1 = normalizeSafe(n1, Vector3(0), 0.0f);
float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f));
error += angle * angle;
}
return float(sqrt(error / count));
}

@ -1,5 +1,6 @@
#include "nvimage.h" #include "nvimage.h"
#include "FloatImage.h" // For FloatImage::WrapMode
namespace nv namespace nv
@ -9,13 +10,15 @@ namespace nv
float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float rmsAlphaError(const FloatImage * ref, const FloatImage * img); float rmsAlphaError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight);
float cieLabError(const FloatImage * ref, const FloatImage * img); float cieLabError(const FloatImage * ref, const FloatImage * img);
float cieLab94Error(const FloatImage * ref, const FloatImage * img); float cieLab94Error(const FloatImage * ref, const FloatImage * img);
float spatialCieLabError(const FloatImage * ref, const FloatImage * img); float spatialCieLabError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float averageAngularError(const FloatImage * img0, const FloatImage * img1); float averageAngularError(const FloatImage * img0, const FloatImage * img1);
float rmsAngularError(const FloatImage * img0, const FloatImage * img1); float rmsAngularError(const FloatImage * img0, const FloatImage * img1);

File diff suppressed because it is too large Load Diff

@ -35,6 +35,7 @@ namespace nv
}; };
NVIMAGE_API FloatImage(); NVIMAGE_API FloatImage();
NVIMAGE_API FloatImage(const FloatImage & img);
NVIMAGE_API FloatImage(const Image * img); NVIMAGE_API FloatImage(const Image * img);
NVIMAGE_API virtual ~FloatImage(); NVIMAGE_API virtual ~FloatImage();
@ -92,10 +93,10 @@ namespace nv
NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const;

@ -42,13 +42,21 @@ const Image & Image::operator=(const Image & img)
void Image::allocate(uint w, uint h, uint d/*= 1*/) void Image::allocate(uint w, uint h, uint d/*= 1*/)
{ {
free();
m_width = w; m_width = w;
m_height = h; m_height = h;
m_depth = d; m_depth = d;
m_data = realloc<Color32>(m_data, w * h * d); m_data = realloc<Color32>(m_data, w * h * d);
} }
void Image::acquire(Color32 * data, uint w, uint h, uint d/*= 1*/)
{
free();
m_width = w;
m_height = h;
m_depth = d;
m_data = data;
}
void Image::resize(uint w, uint h, uint d/*= 1*/) { void Image::resize(uint w, uint h, uint d/*= 1*/) {
Image img; Image img;

@ -34,6 +34,7 @@ namespace nv
void allocate(uint w, uint h, uint d = 1); void allocate(uint w, uint h, uint d = 1);
void acquire(Color32 * data, uint w, uint h, uint d = 1);
bool load(const char * name); bool load(const char * name);
void resize(uint w, uint h, uint d = 1); void resize(uint w, uint h, uint d = 1);

@ -8,6 +8,8 @@
#include "DirectDrawSurface.h" #include "DirectDrawSurface.h"
#include "PixelFormat.h" #include "PixelFormat.h"
#include "nvthread/ParallelFor.h"
#include "nvmath/Color.h" #include "nvmath/Color.h"
#include "nvmath/Half.h" #include "nvmath/Half.h"
@ -19,31 +21,31 @@
#include "nvcore/TextWriter.h" #include "nvcore/TextWriter.h"
// Extern // Extern
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
# include <FreeImage.h> # include <FreeImage.h>
// If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms. // If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms.
# undef HAVE_JPEG # undef NV_HAVE_JPEG
# undef HAVE_PNG # undef NV_HAVE_PNG
# undef HAVE_TIFF # undef NV_HAVE_TIFF
# undef HAVE_OPENEXR # undef NV_HAVE_OPENEXR
#endif #endif
#if defined(HAVE_JPEG) #if defined(NV_HAVE_JPEG)
extern "C" { extern "C" {
# include <jpeglib.h> # include <jpeglib.h>
} }
#endif #endif
#if defined(HAVE_PNG) #if defined(NV_HAVE_PNG)
# include <png.h> # include <png.h>
#endif #endif
#if defined(HAVE_TIFF) #if defined(NV_HAVE_TIFF)
# define _TIFF_DATA_TYPEDEFS_ # define _TIFF_DATA_TYPEDEFS_
# include <tiffio.h> # include <tiffio.h>
#endif #endif
#if defined(HAVE_OPENEXR) #if defined(NV_HAVE_OPENEXR)
# include <ImfIO.h> # include <ImfIO.h>
# include <ImathBox.h> # include <ImathBox.h>
# include <ImfChannelList.h> # include <ImfChannelList.h>
@ -52,7 +54,7 @@ extern "C" {
# include <ImfArray.h> # include <ImfArray.h>
#endif #endif
#if defined(HAVE_STBIMAGE) #if defined(NV_HAVE_STBIMAGE)
# define STBI_NO_STDIO # define STBI_NO_STDIO
# include <stb_image.h> # include <stb_image.h>
#endif #endif
@ -303,6 +305,51 @@ static bool saveTGA(Stream & s, const Image * img)
return true; return true;
} }
#pragma optimize("", off)
// Save BMP image.
static bool saveBMP(Stream & s, const Image * img)
{
int w = img->width();
int h = img->height();
int image_size = w * h * 3;
BmpFileHeader header;
zero(header);
header.type = BM_TYPE;
header.size = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE + image_size;
header.offBits = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE;
BmpInfoHeader info;
zero(info);
info.size = BITMAPINFOHEADER_SIZE;
info.width = w;
info.height = h;
info.planes = 1;
info.bitCount = 24;
info.sizeImage = image_size;
info.xPelsPerMeter = 2000;
info.yPelsPerMeter = 2000;
s << header;
s << info;
nv::Array<uint8> data;
data.resize(3 * w);
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
data[x * 3 + 0] = img->pixel(x, h - y - 1).b;
data[x * 3 + 1] = img->pixel(x, h - y - 1).g;
data[x * 3 + 2] = img->pixel(x, h - y - 1).r;
}
s.serialize(data.buffer(), data.size());
}
return true;
}
/*static Image * loadPPM(Stream & s) /*static Image * loadPPM(Stream & s)
{ {
// @@ // @@
@ -324,7 +371,10 @@ static bool savePPM(Stream & s, const Image * img)
writer.writeString("255\n"); writer.writeString("255\n");
for (uint i = 0; i < w * h; i++) { for (uint i = 0; i < w * h; i++) {
Color32 c = img->pixel(i); Color32 c = img->pixel(i);
s << (uint8_t&)c.r << (uint8_t&)c.g << (uint8_t&)c.b; uint8 r = c.r; // current version of apple's llvm compiling for arm64 doesn't like taking the address of a bit-field. Workaround by using the stack
uint8 g = c.g;
uint8 b = c.b;
s << r << g << b;
} }
return true; return true;
@ -653,7 +703,7 @@ static bool saveFloatDDS(Stream & s, const FloatImage * img, uint base_component
} }
#if defined(HAVE_PNG) #if defined(NV_HAVE_PNG)
static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length) static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
{ {
@ -902,9 +952,9 @@ static bool savePNG(Stream & s, const Image * img, const char ** tags/*=NULL*/)
return true; return true;
} }
#endif // defined(HAVE_PNG) #endif // defined(NV_HAVE_PNG)
#if defined(HAVE_JPEG) #if defined(NV_HAVE_JPEG)
static void init_source (j_decompress_ptr /*cinfo*/){ static void init_source (j_decompress_ptr /*cinfo*/){
} }
@ -1011,9 +1061,9 @@ static Image * loadJPG(Stream & s)
return img.release(); return img.release();
} }
#endif // defined(HAVE_JPEG) #endif // defined(NV_HAVE_JPEG)
#if defined(HAVE_TIFF) #if defined(NV_HAVE_TIFF)
/* /*
static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size) static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size)
@ -1207,9 +1257,9 @@ static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint
return true; return true;
} }
#endif // defined(HAVE_TIFF) #endif // defined(NV_HAVE_TIFF)
#if defined(HAVE_OPENEXR) #if defined(NV_HAVE_OPENEXR)
namespace namespace
{ {
@ -1348,10 +1398,10 @@ static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint
return true; return true;
} }
#endif // defined(HAVE_OPENEXR) #endif // defined(NV_HAVE_OPENEXR)
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle) static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle)
{ {
@ -1688,10 +1738,10 @@ bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Fl
return result; return result;
} }
#endif // defined(HAVE_FREEIMAGE) #endif // defined(NV_HAVE_FREEIMAGE)
#if defined(HAVE_STBIMAGE) #if defined(NV_HAVE_STBIMAGE)
static Image * loadSTB(Stream & s) static Image * loadSTB(Stream & s)
{ {
@ -1704,28 +1754,22 @@ static Image * loadSTB(Stream & s)
int w, h, n; int w, h, n;
uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4); uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4);
// @@ Hack: STB is returning n=4, because we request 4 components, even when input only has 3.
n = 3;
delete [] buffer; delete [] buffer;
if (data != NULL) { if (data != NULL) {
Image * img = new Image; Image * img = new Image;
img->allocate(w, h); img->acquire((Color32 *)data, w, h);
img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB); img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB);
for (int y = 0; y < h; ++y) int count = w * h;
{ for (int i = 0; i < count; ++i) {
nv::Color32* dest = img->scanline(y); //parallel_for(count, 128, [&](int i) {
uint8* src = data + y * w * 4; Color32 & pixel = img->pixel(i);
swap(pixel.r, pixel.b);
for (int x = 0; x < w; ++x) }//);
{
dest[x].r = src[x * 4 + 0];
dest[x].g = src[x * 4 + 1];
dest[x].b = src[x * 4 + 2];
dest[x].a = src[x * 4 + 3];
}
}
free(data);
return img; return img;
} }
@ -1766,7 +1810,7 @@ static FloatImage * loadFloatSTB(Stream & s)
return NULL; return NULL;
} }
#endif // defined(HAVE_STBIMAGE) #endif // defined(NV_HAVE_STBIMAGE)
@ -1804,32 +1848,33 @@ Image * nv::ImageIO::load(const char * fileName, Stream & s)
return loadPPM(s); return loadPPM(s);
}*/ }*/
#if defined(HAVE_JPEG) #if defined(NV_HAVE_JPEG)
if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) { if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) {
return loadJPG(s); return loadJPG(s);
} }
#endif #endif
#if defined(HAVE_PNG) #if defined(NV_HAVE_PNG)
if (strCaseDiff(extension, ".png") == 0) { if (strCaseDiff(extension, ".png") == 0) {
return loadPNG(s); return loadPNG(s);
} }
#endif #endif
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) {
return loadFreeImage(fif, s); return loadFreeImage(fif, s);
} }
#endif #endif
#if defined(HAVE_STBIMAGE) #if defined(NV_HAVE_STBIMAGE)
return loadSTB(s); return loadSTB(s);
#endif #endif
return NULL; return NULL;
} }
bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/) bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/)
{ {
nvDebugCheck(fileName != NULL); nvDebugCheck(fileName != NULL);
@ -1838,6 +1883,10 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
const char * extension = Path::extension(fileName); const char * extension = Path::extension(fileName);
if (strCaseDiff(extension, ".bmp") == 0) {
return saveBMP(s, img);
}
if (strCaseDiff(extension, ".tga") == 0) { if (strCaseDiff(extension, ".tga") == 0) {
return saveTGA(s, img); return saveTGA(s, img);
} }
@ -1846,13 +1895,13 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
return savePPM(s, img); return savePPM(s, img);
} }
#if defined(HAVE_PNG) #if defined(NV_HAVE_PNG)
if (strCaseDiff(extension, ".png") == 0) { if (strCaseDiff(extension, ".png") == 0) {
return savePNG(s, img, tags); return savePNG(s, img, tags);
} }
#endif #endif
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
return saveFreeImage(fif, s, img, tags); return saveFreeImage(fif, s, img, tags);
@ -1899,27 +1948,27 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
return loadFloatPFM(s); return loadFloatPFM(s);
}*/ }*/
#if defined(HAVE_TIFF) #if defined(NV_HAVE_TIFF)
#pragma NV_MESSAGE("TODO: Load TIFF from stream.") #pragma NV_MESSAGE("TODO: Load TIFF from stream.")
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) { if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
return loadFloatTIFF(fileName, s); return loadFloatTIFF(fileName, s);
} }
#endif #endif
#if defined(HAVE_OPENEXR) #if defined(NV_HAVE_OPENEXR)
#pragma NV_MESSAGE("TODO: Load EXR from stream.") #pragma NV_MESSAGE("TODO: Load EXR from stream.")
if (strCaseDiff(extension, ".exr") == 0) { if (strCaseDiff(extension, ".exr") == 0) {
return loadFloatEXR(fileName, s); return loadFloatEXR(fileName, s);
} }
#endif #endif
#if defined(HAVE_STBIMAGE) #if defined(NV_HAVE_STBIMAGE)
if (strCaseDiff(extension, ".hdr") == 0) { if (strCaseDiff(extension, ".hdr") == 0) {
return loadFloatSTB(s); return loadFloatSTB(s);
} }
#endif #endif
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) {
return loadFloatFreeImage(fif, s); return loadFloatFreeImage(fif, s);
@ -1961,7 +2010,7 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
return saveFloatPFM(s, fimage, baseComponent, componentCount); return saveFloatPFM(s, fimage, baseComponent, componentCount);
}*/ }*/
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount); return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount);
@ -2005,14 +2054,15 @@ bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, ui
} }
const char * extension = Path::extension(fileName); const char * extension = Path::extension(fileName);
NV_UNUSED(extension);
#if defined(HAVE_OPENEXR) #if defined(NV_HAVE_OPENEXR)
if (strCaseDiff(extension, ".exr") == 0) { if (strCaseDiff(extension, ".exr") == 0) {
return saveFloatEXR(fileName, fimage, baseComponent, componentCount); return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
} }
#endif #endif
#if defined(HAVE_TIFF) #if defined(NV_HAVE_TIFF)
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) { if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
return saveFloatTIFF(fileName, fimage, baseComponent, componentCount); return saveFloatTIFF(fileName, fimage, baseComponent, componentCount);
} }

@ -1,6 +1,7 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com> // This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#include "KtxFile.h" #include "KtxFile.h"
#include "nvcore/StdStream.h"
using namespace nv; using namespace nv;
@ -10,6 +11,8 @@ static const uint8 fileIdentifier[12] = {
0x0D, 0x0A, 0x1A, 0x0A 0x0D, 0x0A, 0x1A, 0x0A
}; };
namespace nv
{
KtxHeader::KtxHeader() { KtxHeader::KtxHeader() {
memcpy(identifier, fileIdentifier, 12); memcpy(identifier, fileIdentifier, 12);
@ -19,8 +22,8 @@ KtxHeader::KtxHeader() {
glType = 0; glType = 0;
glTypeSize = 1; glTypeSize = 1;
glFormat = 0; glFormat = 0;
glInternalFormat = KTX_RGBA; glInternalFormat = KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1;
glBaseInternalFormat = KTX_RGBA; glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
pixelWidth = 0; pixelWidth = 0;
pixelHeight = 0; pixelHeight = 0;
pixelDepth = 0; pixelDepth = 0;
@ -31,9 +34,9 @@ KtxHeader::KtxHeader() {
} }
Stream & operator<< (Stream & s, DDSHeader & header) { Stream & operator<< (Stream & s, KtxHeader & header) {
s.serialize(header.identifier, 12); s.serialize(header.identifier, 12);
s << header.endiannes << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat; s << header.endianness << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat;
s << header.pixelWidth << header.pixelHeight << header.pixelDepth; s << header.pixelWidth << header.pixelHeight << header.pixelDepth;
s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels; s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels;
s << header.bytesOfKeyValueData; s << header.bytesOfKeyValueData;
@ -41,7 +44,7 @@ Stream & operator<< (Stream & s, DDSHeader & header) {
} }
KtxFile::KtxFile() { /*KtxFile::KtxFile() {
} }
KtxFile::~KtxFile() { KtxFile::~KtxFile() {
} }
@ -49,7 +52,7 @@ KtxFile::~KtxFile() {
void KtxFile::addKeyValue(const char * key, const char * value) { void KtxFile::addKeyValue(const char * key, const char * value) {
keyArray.append(key); keyArray.append(key);
valueArray.append(value); valueArray.append(value);
bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1; header.bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1;
} }
@ -77,7 +80,8 @@ Stream & operator<< (Stream & s, KtxFile & file) {
} }
return s; return s;
} }*/
} // nv

@ -6,6 +6,7 @@
#include "nvimage.h" #include "nvimage.h"
#include "nvcore/StrLib.h" #include "nvcore/StrLib.h"
#include "nvcore/Array.h"
// KTX File format specification: // KTX File format specification:
// http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key // http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key
@ -14,22 +15,99 @@ namespace nv
{ {
class Stream; class Stream;
// GL types (Table 3.2) // GL types
const uint KTX_UNSIGNED_BYTE; const uint KTX_UNSIGNED_BYTE = 0x1401;
const uint KTX_UNSIGNED_SHORT_5_6_5; const uint KTX_BYTE = 0x1400;
// ... const uint KTX_UNSIGNED_SHORT = 0x1403;
const uint KTX_SHORT = 0x1402;
// GL formats (Table 3.3) const uint KTX_UNSIGNED_INT = 0x1405;
// ... const uint KTX_INT = 0x1404;
const uint KTX_FLOAT = 0x1406;
// GL internal formats (Table 3.12, 3.13) const uint KTX_UNSIGNED_BYTE_3_3_2 = 0x8032;
// ... const uint KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362;
const uint KTX_UNSIGNED_SHORT_5_6_5 = 0x8363;
// GL base internal format. (Table 3.11) const uint KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364;
const uint KTX_RGB; const uint KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033;
const uint KTX_RGBA; const uint KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365;
const uint KTX_ALPHA; const uint KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034;
// ... const uint KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366;
const uint KTX_UNSIGNED_INT_8_8_8_8 = 0x8035;
const uint KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367;
const uint KTX_UNSIGNED_INT_10_10_10_2 = 0x8036;
const uint KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368;
// GL formats
const uint KTX_FORMAT_RED = 0x1903;
const uint KTX_FORMAT_RG = 0x8227;
const uint KTX_FORMAT_RGB = 0x1907;
const uint KTX_FORMAT_BGR = 0x80E0;
const uint KTX_FORMAT_RGBA = 0x1908;
const uint KTX_FORMAT_BGRA = 0x80E1;
const uint KTX_FORMAT_RED_INTEGER = 0x8D94;
const uint KTX_FORMAT_RG_INTEGER = 0x8228;
const uint KTX_FORMAT_RGB_INTEGER = 0x8D98;
const uint KTX_FORMAT_BGR_INTEGER = 0x8D9A;
const uint KTX_FORMAT_RGBA_INTEGER = 0x8D99;
const uint KTX_FORMAT_BGRA_INTEGER = 0x8D9B;
const uint KTX_FORMAT_STENCIL_INDEX = 0x1901;
const uint KTX_FORMAT_DEPTH_COMPONENT = 0x1902;
const uint KTX_FORMAT_DEPTH_STENCIL = 0x84F9;
// GL internal formats
// BC1
const uint KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1 = 0x83F0;
const uint KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 = 0x8C4C;
// BC1a
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1 = 0x83F1;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 = 0x8C4D;
// BC2
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3 = 0x83F2;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 = 0x8C4E;
// BC3
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5 = 0x83F3;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 = 0x8C4F;
// BC4
const uint KTX_INTERNAL_COMPRESSED_RED_RGTC1 = 0x8DBB;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 = 0x8DBC;
// BC5
const uint KTX_INTERNAL_COMPRESSED_RG_RGTC2 = 0x8DBD;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 = 0x8DBE;
// BC6
const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F;
const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E;
// BC7
const uint KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D;
// ETC
const uint KTX_INTERNAL_COMPRESSED_RGB_ETC1 = 0x8D64;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC1 = 0x8D64; // ???
// ETC2
const uint KTX_INTERNAL_COMPRESSED_RED_EAC = 0x9270;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_EAC = 0x9271;
const uint KTX_INTERNAL_COMPRESSED_RG_EAC = 0x9272;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_EAC = 0x9273;
const uint KTX_INTERNAL_COMPRESSED_RGB_ETC2 = 0x9274;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC2 = 0x9275;
const uint KTX_INTERNAL_COMPRESSED_RGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9276;
const uint KTX_INTERNAL_COMPRESSED_SRGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9277;
const uint KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC = 0x9278;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC = 0x9279;
// GL base internal formats
const uint KTX_BASE_INTERNAL_DEPTH_COMPONENT = 0x1902;
const uint KTX_BASE_INTERNAL_DEPTH_STENCIL = 0x84F9;
const uint KTX_BASE_INTERNAL_RED = 0x1903;
const uint KTX_BASE_INTERNAL_RG = 0x8227;
const uint KTX_BASE_INTERNAL_RGB = 0x1907;
const uint KTX_BASE_INTERNAL_RGBA = 0x1908;
const uint KTX_BASE_INTERNAL_STENCIL_INDEX = 0x1901;
struct KtxHeader { struct KtxHeader {
@ -52,10 +130,10 @@ namespace nv
}; };
NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); NVIMAGE_API Stream & operator<< (Stream & s, KtxHeader & header);
struct KtxFile { /* struct KtxFile {
KtxFile(); KtxFile();
~KtxFile(); ~KtxFile();
@ -66,10 +144,9 @@ namespace nv
Array<String> keyArray; Array<String> keyArray;
Array<String> valueArray; Array<String> valueArray;
}; };
NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file); NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file);*/
/* /*

@ -1,208 +1,208 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com> // Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
// //
// Permission is hereby granted, free of charge, to any person // Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation // obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without // files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use, // restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell // copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the // copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following // Software is furnished to do so, subject to the following
// conditions: // conditions:
// //
// The above copyright notice and this permission notice shall be // The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software. // included in all copies or substantial portions of the Software.
// //
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES // EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND // OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT // NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, // HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING // WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include "NormalMap.h" #include "NormalMap.h"
#include "Filter.h" #include "Filter.h"
#include "FloatImage.h" #include "FloatImage.h"
#include "Image.h" #include "Image.h"
#include "nvmath/Color.inl" #include "nvmath/Color.inl"
#include "nvmath/Vector.h" #include "nvmath/Vector.h"
#include "nvcore/Ptr.h" #include "nvcore/Ptr.h"
#include <string.h> // memcpy #include <string.h> // memcpy
using namespace nv; using namespace nv;
// Create normal map using the given kernels. // Create normal map using the given kernels.
static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv) static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv)
{ {
nvDebugCheck(kdu != NULL); nvDebugCheck(kdu != NULL);
nvDebugCheck(kdv != NULL); nvDebugCheck(kdv != NULL);
nvDebugCheck(img != NULL); nvDebugCheck(img != NULL);
const uint w = img->width(); const uint w = img->width();
const uint h = img->height(); const uint h = img->height();
AutoPtr<FloatImage> fimage(new FloatImage()); AutoPtr<FloatImage> fimage(new FloatImage());
fimage->allocate(4, w, h); fimage->allocate(4, w, h);
// Compute height and store in alpha channel: // Compute height and store in alpha channel:
float * alphaChannel = fimage->channel(3); float * alphaChannel = fimage->channel(3);
for(uint i = 0; i < w * h; i++) for(uint i = 0; i < w * h; i++)
{ {
Vector4 color = toVector4(img->pixel(i)); Vector4 color = toVector4(img->pixel(i));
alphaChannel[i] = dot(color, heightWeights); alphaChannel[i] = dot(color, heightWeights);
} }
float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor. float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor.
for(uint y = 0; y < h; y++) for(uint y = 0; y < h; y++)
{ {
for(uint x = 0; x < w; x++) for(uint x = 0; x < w; x++)
{ {
const float du = fimage->applyKernelXY(kdu, x, y, 0, 3, wm); const float du = fimage->applyKernelXY(kdu, x, y, 0, 3, wm);
const float dv = fimage->applyKernelXY(kdv, x, y, 0, 3, wm); const float dv = fimage->applyKernelXY(kdv, x, y, 0, 3, wm);
Vector3 n = normalize(Vector3(du, dv, heightScale)); Vector3 n = normalize(Vector3(du, dv, heightScale));
fimage->pixel(0, x, y, 0) = 0.5f * n.x + 0.5f; fimage->pixel(0, x, y, 0) = 0.5f * n.x + 0.5f;
fimage->pixel(1, x, y, 0) = 0.5f * n.y + 0.5f; fimage->pixel(1, x, y, 0) = 0.5f * n.y + 0.5f;
fimage->pixel(2, x, y, 0) = 0.5f * n.z + 0.5f; fimage->pixel(2, x, y, 0) = 0.5f * n.z + 0.5f;
} }
} }
return fimage.release(); return fimage.release();
} }
// Create normal map using the given kernels. // Create normal map using the given kernels.
static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv) static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv)
{ {
nvDebugCheck(kdu != NULL); nvDebugCheck(kdu != NULL);
nvDebugCheck(kdv != NULL); nvDebugCheck(kdv != NULL);
nvDebugCheck(img != NULL); nvDebugCheck(img != NULL);
#pragma NV_MESSAGE("FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.") #pragma NV_MESSAGE("FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.")
const float heightScale = 1.0f / 16.0f; const float heightScale = 1.0f / 16.0f;
const uint w = img->width(); const uint w = img->width();
const uint h = img->height(); const uint h = img->height();
AutoPtr<FloatImage> img_out(new FloatImage()); AutoPtr<FloatImage> img_out(new FloatImage());
img_out->allocate(4, w, h); img_out->allocate(4, w, h);
for (uint y = 0; y < h; y++) for (uint y = 0; y < h; y++)
{ {
for (uint x = 0; x < w; x++) for (uint x = 0; x < w; x++)
{ {
const float du = img->applyKernelXY(kdu, x, y, 0, 3, wm); const float du = img->applyKernelXY(kdu, x, y, 0, 3, wm);
const float dv = img->applyKernelXY(kdv, x, y, 0, 3, wm); const float dv = img->applyKernelXY(kdv, x, y, 0, 3, wm);
Vector3 n = normalize(Vector3(du, dv, heightScale)); Vector3 n = normalize(Vector3(du, dv, heightScale));
img_out->pixel(0, x, y, 0) = n.x; img_out->pixel(0, x, y, 0) = n.x;
img_out->pixel(1, x, y, 0) = n.y; img_out->pixel(1, x, y, 0) = n.y;
img_out->pixel(2, x, y, 0) = n.z; img_out->pixel(2, x, y, 0) = n.z;
} }
} }
// Copy alpha channel. // Copy alpha channel.
/*for (uint y = 0; y < h; y++) /*for (uint y = 0; y < h; y++)
{ {
for (uint x = 0; x < w; x++) for (uint x = 0; x < w; x++)
{ {
img_out->pixel(3, x, y, 0) = img->pixel(3, x, y, 0); img_out->pixel(3, x, y, 0) = img->pixel(3, x, y, 0);
} }
}*/ }*/
memcpy(img_out->channel(3), img->channel(3), w * h * sizeof(float)); memcpy(img_out->channel(3), img->channel(3), w * h * sizeof(float));
return img_out.release(); return img_out.release();
} }
/// Create normal map using the given filter. /// Create normal map using the given filter.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/) FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/)
{ {
nvDebugCheck(img != NULL); nvDebugCheck(img != NULL);
// Init the kernels. // Init the kernels.
Kernel2 * kdu = NULL; Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL; Kernel2 * kdv = NULL;
switch(filter) switch(filter)
{ {
case NormalMapFilter_Sobel3x3: case NormalMapFilter_Sobel3x3:
kdu = new Kernel2(3); kdu = new Kernel2(3);
break; break;
case NormalMapFilter_Sobel5x5: case NormalMapFilter_Sobel5x5:
kdu = new Kernel2(5); kdu = new Kernel2(5);
break; break;
case NormalMapFilter_Sobel7x7: case NormalMapFilter_Sobel7x7:
kdu = new Kernel2(7); kdu = new Kernel2(7);
break; break;
case NormalMapFilter_Sobel9x9: case NormalMapFilter_Sobel9x9:
kdu = new Kernel2(9); kdu = new Kernel2(9);
break; break;
default: default:
nvDebugCheck(false); nvDebugCheck(false);
}; };
kdu->initSobel(); kdu->initSobel();
kdu->normalize(); kdu->normalize();
kdv = new Kernel2(*kdu); kdv = new Kernel2(*kdu);
kdv->transpose(); kdv->transpose();
return ::createNormalMap(img, wm, heightWeights, kdu, kdv); return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
} }
/// Create normal map combining multiple sobel filters. /// Create normal map combining multiple sobel filters.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights) FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights)
{ {
nvDebugCheck(img != NULL); nvDebugCheck(img != NULL);
Kernel2 * kdu = NULL; Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL; Kernel2 * kdv = NULL;
kdu = new Kernel2(9); kdu = new Kernel2(9);
kdu->initBlendedSobel(filterWeights); kdu->initBlendedSobel(filterWeights);
kdu->normalize(); kdu->normalize();
kdv = new Kernel2(*kdu); kdv = new Kernel2(*kdu);
kdv->transpose(); kdv->transpose();
return ::createNormalMap(img, wm, heightWeights, kdu, kdv); return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
} }
FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights) FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights)
{ {
nvDebugCheck(img != NULL); nvDebugCheck(img != NULL);
Kernel2 * kdu = NULL; Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL; Kernel2 * kdv = NULL;
kdu = new Kernel2(9); kdu = new Kernel2(9);
kdu->initBlendedSobel(filterWeights); kdu->initBlendedSobel(filterWeights);
kdu->normalize(); kdu->normalize();
kdv = new Kernel2(*kdu); kdv = new Kernel2(*kdu);
kdv->transpose(); kdv->transpose();
return ::createNormalMap(img, wm, kdu, kdv); return ::createNormalMap(img, wm, kdu, kdv);
} }
/// Normalize the given image in place. /// Normalize the given image in place.
void nv::normalizeNormalMap(FloatImage * img) void nv::normalizeNormalMap(FloatImage * img)
{ {
nvDebugCheck(img != NULL); nvDebugCheck(img != NULL);
img->normalize(0); img->normalize(0);
} }

@ -101,6 +101,48 @@ inline Stream & operator<< (Stream & s, TgaFile & tga)
return s; return s;
} }
// @@ Move to BMP file?
const int BITMAPFILEHEADER_SIZE = 14;
const int BITMAPINFOHEADER_SIZE = 40;
const int BM_TYPE = ((unsigned int)'M') << 8 | ((unsigned int)'B');
// BMP Header.
struct BmpFileHeader {
uint16 type;
uint32 size;
uint16 reserved1;
uint16 reserved2;
uint32 offBits;
};
struct BmpInfoHeader {
uint32 size;
uint32 width;
uint32 height;
uint16 planes;
uint16 bitCount;
uint32 compression;
uint32 sizeImage;
uint32 xPelsPerMeter;
uint32 yPelsPerMeter;
uint32 clrUsed;
uint32 clrImportant;
};
inline Stream & operator<< (Stream & s, BmpFileHeader & bmp) {
return s << bmp.type << bmp.size << bmp.reserved1 << bmp.reserved2 << bmp.offBits;
}
inline Stream & operator<< (Stream & s, BmpInfoHeader & bmp) {
s << bmp.size << bmp.width << bmp.height << bmp.planes << bmp.bitCount << bmp.compression << bmp.sizeImage;
s << bmp.xPelsPerMeter << bmp.yPelsPerMeter << bmp.clrUsed << bmp.clrImportant;
return s;
}
} // nv namespace } // nv namespace
#endif // NV_IMAGE_TGAFILE_H #endif // NV_IMAGE_TGAFILE_H

@ -7,7 +7,7 @@ SET(MATH_SRCS
Fitting.h Fitting.cpp Fitting.h Fitting.cpp
Gamma.h Gamma.cpp Gamma.h Gamma.cpp
Half.h Half.cpp Half.h Half.cpp
Matrix.h Matrix.h Matrix.inl Matrix.cpp
Plane.h Plane.inl Plane.cpp Plane.h Plane.inl Plane.cpp
SphericalHarmonic.h SphericalHarmonic.cpp SphericalHarmonic.h SphericalHarmonic.cpp
SimdVector.h SimdVector_SSE.h SimdVector_VE.h SimdVector.h SimdVector_SSE.h SimdVector_VE.h

@ -157,6 +157,12 @@ namespace nv
return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale); return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale);
} }
inline Vector3 toVector3(Color32 c)
{
const float scale = 1.0f / 255.0f;
return Vector3(c.r * scale, c.g * scale, c.b * scale);
}
inline float perceptualColorDistance(Vector3::Arg c0, Vector3::Arg c1) inline float perceptualColorDistance(Vector3::Arg c0, Vector3::Arg c1)
{ {

@ -1,441 +1,487 @@
// This code is in the public domain -- castanyo@yahoo.es // This code is in the public domain -- castanyo@yahoo.es
#include "Matrix.inl" #include "Matrix.inl"
#include "Vector.inl" #include "Vector.inl"
#include "nvcore/Array.inl" #include "nvcore/Array.inl"
#include <float.h> #include <float.h>
#if !NV_CC_MSVC && !NV_OS_ORBIS #if !NV_CC_MSVC && !NV_OS_ORBIS
#include <alloca.h> #include <alloca.h>
#endif #endif
using namespace nv; using namespace nv;
// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise // Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise
// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above; // permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above;
// indx[1..n] is an output vector that records the row permutation effected by the partial // indx[1..n] is an output vector that records the row permutation effected by the partial
// pivoting; d is output as -1 depending on whether the number of row interchanges was even // pivoting; d is output as -1 depending on whether the number of row interchanges was even
// or odd, respectively. This routine is used in combination with lubksb to solve linear equations // or odd, respectively. This routine is used in combination with lubksb to solve linear equations
// or invert a matrix. // or invert a matrix.
static bool ludcmp(float **a, int n, int *indx, float *d) static bool ludcmp(float **a, int n, int *indx, float *d)
{ {
const float TINY = 1.0e-20f; const float TINY = 1.0e-20f;
float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row. float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row.
*d = 1.0; // No row interchanges yet. *d = 1.0; // No row interchanges yet.
for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information. for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information.
float big = 0.0; float big = 0.0;
for (int j = 0; j < n; j++) { for (int j = 0; j < n; j++) {
big = max(big, fabsf(a[i][j])); big = max(big, fabsf(a[i][j]));
} }
if (big == 0) { if (big == 0) {
return false; // Singular matrix return false; // Singular matrix
} }
// No nonzero largest element. // No nonzero largest element.
vv[i] = 1.0f / big; // Save the scaling. vv[i] = 1.0f / big; // Save the scaling.
} }
for (int j = 0; j < n; j++) { // This is the loop over columns of Crout's method. for (int j = 0; j < n; j++) { // This is the loop over columns of Crout's method.
for (int i = 0; i < j; i++) { // This is equation (2.3.12) except for i = j. for (int i = 0; i < j; i++) { // This is equation (2.3.12) except for i = j.
float sum = a[i][j]; float sum = a[i][j];
for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j]; for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j];
a[i][j] = sum; a[i][j] = sum;
} }
int imax = -1; int imax = -1;
float big = 0.0; // Initialize for the search for largest pivot element. float big = 0.0; // Initialize for the search for largest pivot element.
for (int i = j; i < n; i++) { // This is i = j of equation (2.3.12) and i = j+ 1 : : : N for (int i = j; i < n; i++) { // This is i = j of equation (2.3.12) and i = j+ 1 : : : N
float sum = a[i][j]; // of equation (2.3.13). float sum = a[i][j]; // of equation (2.3.13).
for (int k = 0; k < j; k++) { for (int k = 0; k < j; k++) {
sum -= a[i][k]*a[k][j]; sum -= a[i][k]*a[k][j];
} }
a[i][j]=sum; a[i][j]=sum;
float dum = vv[i]*fabs(sum); float dum = vv[i]*fabs(sum);
if (dum >= big) { if (dum >= big) {
// Is the figure of merit for the pivot better than the best so far? // Is the figure of merit for the pivot better than the best so far?
big = dum; big = dum;
imax = i; imax = i;
} }
} }
nvDebugCheck(imax != -1); nvDebugCheck(imax != -1);
if (j != imax) { // Do we need to interchange rows? if (j != imax) { // Do we need to interchange rows?
for (int k = 0; k < n; k++) { // Yes, do so... for (int k = 0; k < n; k++) { // Yes, do so...
swap(a[imax][k], a[j][k]); swap(a[imax][k], a[j][k]);
} }
*d = -(*d); // ...and change the parity of d. *d = -(*d); // ...and change the parity of d.
vv[imax]=vv[j]; // Also interchange the scale factor. vv[imax]=vv[j]; // Also interchange the scale factor.
} }
indx[j]=imax; indx[j]=imax;
if (a[j][j] == 0.0) a[j][j] = TINY; if (a[j][j] == 0.0) a[j][j] = TINY;
// If the pivot element is zero the matrix is singular (at least to the precision of the // If the pivot element is zero the matrix is singular (at least to the precision of the
// algorithm). For some applications on singular matrices, it is desirable to substitute // algorithm). For some applications on singular matrices, it is desirable to substitute
// TINY for zero. // TINY for zero.
if (j != n-1) { // Now, finally, divide by the pivot element. if (j != n-1) { // Now, finally, divide by the pivot element.
float dum = 1.0f / a[j][j]; float dum = 1.0f / a[j][j];
for (int i = j+1; i < n; i++) a[i][j] *= dum; for (int i = j+1; i < n; i++) a[i][j] *= dum;
} }
} // Go back for the next column in the reduction. } // Go back for the next column in the reduction.
return true; return true;
} }
// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix // Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix
// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input // A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input
// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector // as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector
// B, and returns with the solution vector X. a, n, and indx are not modified by this routine // B, and returns with the solution vector X. a, n, and indx are not modified by this routine
// and can be left in place for successive calls with different right-hand sides b. This routine takes // and can be left in place for successive calls with different right-hand sides b. This routine takes
// into account the possibility that b will begin with many zero elements, so it is efficient for use // into account the possibility that b will begin with many zero elements, so it is efficient for use
// in matrix inversion. // in matrix inversion.
static void lubksb(float **a, int n, int *indx, float b[]) static void lubksb(float **a, int n, int *indx, float b[])
{ {
int ii = 0; int ii = 0;
for (int i=0; i<n; i++) { // When ii is set to a positive value, it will become for (int i=0; i<n; i++) { // When ii is set to a positive value, it will become
int ip = indx[i]; // the index of the first nonvanishing element of b. We now int ip = indx[i]; // the index of the first nonvanishing element of b. We now
float sum = b[ip]; // do the forward substitution, equation (2.3.6). The float sum = b[ip]; // do the forward substitution, equation (2.3.6). The
b[ip] = b[i]; // only new wrinkle is to unscramble the permutation as we go. b[ip] = b[i]; // only new wrinkle is to unscramble the permutation as we go.
if (ii != 0) { if (ii != 0) {
for (int j = ii-1; j < i; j++) sum -= a[i][j]*b[j]; for (int j = ii-1; j < i; j++) sum -= a[i][j]*b[j];
} }
else if (sum != 0.0f) { else if (sum != 0.0f) {
ii = i+1; // A nonzero element was encountered, so from now on we ii = i+1; // A nonzero element was encountered, so from now on we
} }
b[i] = sum; // will have to do the sums in the loop above. b[i] = sum; // will have to do the sums in the loop above.
} }
for (int i=n-1; i>=0; i--) { // Now we do the backsubstitution, equation (2.3.7). for (int i=n-1; i>=0; i--) { // Now we do the backsubstitution, equation (2.3.7).
float sum = b[i]; float sum = b[i];
for (int j = i+1; j < n; j++) { for (int j = i+1; j < n; j++) {
sum -= a[i][j]*b[j]; sum -= a[i][j]*b[j];
} }
b[i] = sum/a[i][i]; // Store a component of the solution vector X. b[i] = sum/a[i][i]; // Store a component of the solution vector X.
} // All done! } // All done!
} }
bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x) bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x)
{ {
nvDebugCheck(x != NULL); nvDebugCheck(x != NULL);
float m[4][4]; float m[4][4];
float *a[4] = {m[0], m[1], m[2], m[3]}; float *a[4] = {m[0], m[1], m[2], m[3]};
int idx[4]; int idx[4];
float d; float d;
for (int y = 0; y < 4; y++) { for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) { for (int x = 0; x < 4; x++) {
a[x][y] = A(x, y); a[x][y] = A(x, y);
} }
} }
// Create LU decomposition. // Create LU decomposition.
if (!ludcmp(a, 4, idx, &d)) { if (!ludcmp(a, 4, idx, &d)) {
// Singular matrix. // Singular matrix.
return false; return false;
} }
// Init solution. // Init solution.
*x = b; *x = b;
// Do back substitution. // Do back substitution.
lubksb(a, 4, idx, x->component); lubksb(a, 4, idx, x->component);
return true; return true;
} }
// @@ Not tested. // @@ Not tested.
Matrix nv::inverseLU(const Matrix & A) Matrix nv::inverseLU(const Matrix & A)
{ {
Vector4 Ai[4]; Vector4 Ai[4];
solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]); solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]);
solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]); solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]);
solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]); solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]);
solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]); solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]);
return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]); return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]);
} }
bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x) bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
{ {
nvDebugCheck(x != NULL); nvDebugCheck(x != NULL);
float m[3][3]; float m[3][3];
float *a[3] = {m[0], m[1], m[2]}; float *a[3] = {m[0], m[1], m[2]};
int idx[3]; int idx[3];
float d; float d;
for (int y = 0; y < 3; y++) { for (int y = 0; y < 3; y++) {
for (int x = 0; x < 3; x++) { for (int x = 0; x < 3; x++) {
a[x][y] = A(x, y); a[x][y] = A(x, y);
} }
} }
// Create LU decomposition. // Create LU decomposition.
if (!ludcmp(a, 3, idx, &d)) { if (!ludcmp(a, 3, idx, &d)) {
// Singular matrix. // Singular matrix.
return false; return false;
} }
// Init solution. // Init solution.
*x = b; *x = b;
// Do back substitution. // Do back substitution.
lubksb(a, 3, idx, x->component); lubksb(a, 3, idx, x->component);
return true; return true;
} }
bool nv::solveLU(const Matrix2 & A, const Vector2 & b, Vector2 * x)
bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x) {
{ nvDebugCheck(x != NULL);
nvDebugCheck(x != NULL);
float m[2][2];
*x = transform(inverseCramer(A), b); float *a[2] = {m[0], m[1]};
int idx[2];
return true; // @@ Return false if determinant(A) == 0 ! float d;
}
for (int y = 0; y < 2; y++) {
bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x) for (int x = 0; x < 2; x++) {
{ a[x][y] = A(x, y);
nvDebugCheck(x != NULL); }
}
const float det = A.determinant();
if (equal(det, 0.0f)) { // @@ Use input epsilon. // Create LU decomposition.
return false; if (!ludcmp(a, 2, idx, &d)) {
} // Singular matrix.
return false;
Matrix3 Ai = inverseCramer(A); }
*x = transform(Ai, b); // Init solution.
*x = b;
return true;
} // Do back substitution.
lubksb(a, 2, idx, x->component);
return true;
// Inverse using gaussian elimination. From Jon's code. }
Matrix nv::inverse(const Matrix & m) {
Matrix A = m; bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
Matrix B(identity); {
nvDebugCheck(x != NULL);
int i, j, k;
float max, t, det, pivot; *x = transform(inverseCramer(A), b);
det = 1.0; return true; // @@ Return false if determinant(A) == 0 !
for (i=0; i<4; i++) { /* eliminate in column i, below diag */ }
max = -1.;
for (k=i; k<4; k++) /* find pivot for column i */ bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
if (fabs(A(k, i)) > max) { {
max = fabs(A(k, i)); nvDebugCheck(x != NULL);
j = k;
} const float det = A.determinant();
if (max<=0.) return B; /* if no nonzero pivot, PUNT */ if (equal(det, 0.0f)) { // @@ Use input epsilon.
if (j!=i) { /* swap rows i and j */ return false;
for (k=i; k<4; k++) }
swap(A(i, k), A(j, k));
for (k=0; k<4; k++) Matrix3 Ai = inverseCramer(A);
swap(B(i, k), B(j, k));
det = -det; *x = transform(Ai, b);
}
pivot = A(i, i); return true;
det *= pivot; }
for (k=i+1; k<4; k++) /* only do elems to right of pivot */
A(i, k) /= pivot; bool nv::solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x)
for (k=0; k<4; k++) {
B(i, k) /= pivot; nvDebugCheck(x != NULL);
/* we know that A(i, i) will be set to 1, so don't bother to do it */
const float det = A.determinant();
for (j=i+1; j<4; j++) { /* eliminate in rows below i */ if (equal(det, 0.0f)) { // @@ Use input epsilon.
t = A(j, i); /* we're gonna zero this guy */ return false;
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ }
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
for (k=0; k<4; k++) Matrix2 Ai = inverseCramer(A);
B(j, k) -= B(i, k)*t;
} *x = transform(Ai, b);
}
return true;
/*---------- backward elimination ----------*/ }
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */ // Inverse using gaussian elimination. From Jon's code.
for (k=0; k<4; k++) /* subtract scaled row i from row j */ Matrix nv::inverse(const Matrix & m) {
B(j, k) -= B(i, k)*t;
} Matrix A = m;
} Matrix B(identity);
return B; int i, j, k;
} float max, t, det, pivot;
det = 1.0;
Matrix3 nv::inverse(const Matrix3 & m) { for (i=0; i<4; i++) { /* eliminate in column i, below diag */
max = -1.;
Matrix3 A = m; for (k=i; k<4; k++) /* find pivot for column i */
Matrix3 B(identity); if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
int i, j, k; j = k;
float max, t, det, pivot; }
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
det = 1.0; if (j!=i) { /* swap rows i and j */
for (i=0; i<3; i++) { /* eliminate in column i, below diag */ for (k=i; k<4; k++)
max = -1.; swap(A(i, k), A(j, k));
for (k=i; k<3; k++) /* find pivot for column i */ for (k=0; k<4; k++)
if (fabs(A(k, i)) > max) { swap(B(i, k), B(j, k));
max = fabs(A(k, i)); det = -det;
j = k; }
} pivot = A(i, i);
if (max<=0.) return B; /* if no nonzero pivot, PUNT */ det *= pivot;
if (j!=i) { /* swap rows i and j */ for (k=i+1; k<4; k++) /* only do elems to right of pivot */
for (k=i; k<3; k++) A(i, k) /= pivot;
swap(A(i, k), A(j, k)); for (k=0; k<4; k++)
for (k=0; k<3; k++) B(i, k) /= pivot;
swap(B(i, k), B(j, k)); /* we know that A(i, i) will be set to 1, so don't bother to do it */
det = -det;
} for (j=i+1; j<4; j++) { /* eliminate in rows below i */
pivot = A(i, i); t = A(j, i); /* we're gonna zero this guy */
det *= pivot; for (k=i+1; k<4; k++) /* subtract scaled row i from row j */
for (k=i+1; k<3; k++) /* only do elems to right of pivot */ A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
A(i, k) /= pivot; for (k=0; k<4; k++)
for (k=0; k<3; k++) B(j, k) -= B(i, k)*t;
B(i, k) /= pivot; }
/* we know that A(i, i) will be set to 1, so don't bother to do it */ }
for (j=i+1; j<3; j++) { /* eliminate in rows below i */ /*---------- backward elimination ----------*/
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<3; k++) /* subtract scaled row i from row j */ for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ for (j=0; j<i; j++) { /* eliminate in rows above i */
for (k=0; k<3; k++) t = A(j, i); /* we're gonna zero this guy */
B(j, k) -= B(i, k)*t; for (k=0; k<4; k++) /* subtract scaled row i from row j */
} B(j, k) -= B(i, k)*t;
} }
}
/*---------- backward elimination ----------*/
return B;
for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */ }
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */
for (k=0; k<3; k++) /* subtract scaled row i from row j */ Matrix3 nv::inverse(const Matrix3 & m) {
B(j, k) -= B(i, k)*t;
} Matrix3 A = m;
} Matrix3 B(identity);
return B; int i, j, k;
} float max, t, det, pivot;
det = 1.0;
for (i=0; i<3; i++) { /* eliminate in column i, below diag */
max = -1.;
for (k=i; k<3; k++) /* find pivot for column i */
#if 0 if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
// Copyright (C) 1999-2004 Michael Garland. j = k;
// }
// Permission is hereby granted, free of charge, to any person obtaining a if (max<=0.) return B; /* if no nonzero pivot, PUNT */
// copy of this software and associated documentation files (the if (j!=i) { /* swap rows i and j */
// "Software"), to deal in the Software without restriction, including for (k=i; k<3; k++)
// without limitation the rights to use, copy, modify, merge, publish, swap(A(i, k), A(j, k));
// distribute, and/or sell copies of the Software, and to permit persons for (k=0; k<3; k++)
// to whom the Software is furnished to do so, provided that the above swap(B(i, k), B(j, k));
// copyright notice(s) and this permission notice appear in all copies of det = -det;
// the Software and that both the above copyright notice(s) and this }
// permission notice appear in supporting documentation. pivot = A(i, i);
// det *= pivot;
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS for (k=i+1; k<3; k++) /* only do elems to right of pivot */
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF A(i, k) /= pivot;
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT for (k=0; k<3; k++)
// OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR B(i, k) /= pivot;
// HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL /* we know that A(i, i) will be set to 1, so don't bother to do it */
// INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
// FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, for (j=i+1; j<3; j++) { /* eliminate in rows below i */
// NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION t = A(j, i); /* we're gonna zero this guy */
// WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. for (k=i+1; k<3; k++) /* subtract scaled row i from row j */
// A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
// Except as contained in this notice, the name of a copyright holder for (k=0; k<3; k++)
// shall not be used in advertising or otherwise to promote the sale, use B(j, k) -= B(i, k)*t;
// or other dealings in this Software without prior written authorization }
// of the copyright holder. }
/*---------- backward elimination ----------*/
// Matrix inversion code for 4x4 matrices using Gaussian elimination
// with partial pivoting. This is a specialized version of a for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */
// procedure originally due to Paul Heckbert <ph@cs.cmu.edu>. for (j=0; j<i; j++) { /* eliminate in rows above i */
// t = A(j, i); /* we're gonna zero this guy */
// Returns determinant of A, and B=inverse(A) for (k=0; k<3; k++) /* subtract scaled row i from row j */
// If matrix A is singular, returns 0 and leaves trash in B. B(j, k) -= B(i, k)*t;
// }
#define SWAP(a, b, t) {t = a; a = b; b = t;} }
double invert(Mat4& B, const Mat4& m)
{ return B;
Mat4 A = m; }
int i, j, k;
double max, t, det, pivot;
/*---------- forward elimination ----------*/
for (i=0; i<4; i++) /* put identity matrix in B */ #if 0
for (j=0; j<4; j++)
B(i, j) = (double)(i==j); // Copyright (C) 1999-2004 Michael Garland.
//
det = 1.0; // Permission is hereby granted, free of charge, to any person obtaining a
for (i=0; i<4; i++) { /* eliminate in column i, below diag */ // copy of this software and associated documentation files (the
max = -1.; // "Software"), to deal in the Software without restriction, including
for (k=i; k<4; k++) /* find pivot for column i */ // without limitation the rights to use, copy, modify, merge, publish,
if (fabs(A(k, i)) > max) { // distribute, and/or sell copies of the Software, and to permit persons
max = fabs(A(k, i)); // to whom the Software is furnished to do so, provided that the above
j = k; // copyright notice(s) and this permission notice appear in all copies of
} // the Software and that both the above copyright notice(s) and this
if (max<=0.) return 0.; /* if no nonzero pivot, PUNT */ // permission notice appear in supporting documentation.
if (j!=i) { /* swap rows i and j */ //
for (k=i; k<4; k++) // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
SWAP(A(i, k), A(j, k), t); // OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
for (k=0; k<4; k++) // MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
SWAP(B(i, k), B(j, k), t); // OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
det = -det; // HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
} // INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
pivot = A(i, i); // FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
det *= pivot; // NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
for (k=i+1; k<4; k++) /* only do elems to right of pivot */ // WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
A(i, k) /= pivot; //
for (k=0; k<4; k++) // Except as contained in this notice, the name of a copyright holder
B(i, k) /= pivot; // shall not be used in advertising or otherwise to promote the sale, use
/* we know that A(i, i) will be set to 1, so don't bother to do it */ // or other dealings in this Software without prior written authorization
// of the copyright holder.
for (j=i+1; j<4; j++) { /* eliminate in rows below i */
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ // Matrix inversion code for 4x4 matrices using Gaussian elimination
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ // with partial pivoting. This is a specialized version of a
for (k=0; k<4; k++) // procedure originally due to Paul Heckbert <ph@cs.cmu.edu>.
B(j, k) -= B(i, k)*t; //
} // Returns determinant of A, and B=inverse(A)
} // If matrix A is singular, returns 0 and leaves trash in B.
//
/*---------- backward elimination ----------*/ #define SWAP(a, b, t) {t = a; a = b; b = t;}
double invert(Mat4& B, const Mat4& m)
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ {
for (j=0; j<i; j++) { /* eliminate in rows above i */ Mat4 A = m;
t = A(j, i); /* we're gonna zero this guy */ int i, j, k;
for (k=0; k<4; k++) /* subtract scaled row i from row j */ double max, t, det, pivot;
B(j, k) -= B(i, k)*t;
} /*---------- forward elimination ----------*/
}
for (i=0; i<4; i++) /* put identity matrix in B */
return det; for (j=0; j<4; j++)
} B(i, j) = (double)(i==j);
#endif // 0 det = 1.0;
for (i=0; i<4; i++) { /* eliminate in column i, below diag */
max = -1.;
for (k=i; k<4; k++) /* find pivot for column i */
if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
j = k;
}
if (max<=0.) return 0.; /* if no nonzero pivot, PUNT */
if (j!=i) { /* swap rows i and j */
for (k=i; k<4; k++)
SWAP(A(i, k), A(j, k), t);
for (k=0; k<4; k++)
SWAP(B(i, k), B(j, k), t);
det = -det;
}
pivot = A(i, i);
det *= pivot;
for (k=i+1; k<4; k++) /* only do elems to right of pivot */
A(i, k) /= pivot;
for (k=0; k<4; k++)
B(i, k) /= pivot;
/* we know that A(i, i) will be set to 1, so don't bother to do it */
for (j=i+1; j<4; j++) { /* eliminate in rows below i */
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
for (k=0; k<4; k++)
B(j, k) -= B(i, k)*t;
}
}
/*---------- backward elimination ----------*/
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */
for (k=0; k<4; k++) /* subtract scaled row i from row j */
B(j, k) -= B(i, k)*t;
}
}
return det;
}
#endif // 0

@ -14,6 +14,46 @@ namespace nv
{ {
enum identity_t { identity }; enum identity_t { identity };
// 2x2 matrix.
class NVMATH_CLASS Matrix2
{
public:
Matrix2();
explicit Matrix2(float f);
explicit Matrix2(identity_t);
Matrix2(const Matrix2 & m);
Matrix2(Vector2::Arg v0, Vector2::Arg v1);
Matrix2(float a, float b, float c, float d);
float data(uint idx) const;
float & data(uint idx);
float get(uint row, uint col) const;
float operator()(uint row, uint col) const;
float & operator()(uint row, uint col);
Vector2 row(uint i) const;
Vector2 column(uint i) const;
void operator*=(float s);
void operator/=(float s);
void operator+=(const Matrix2 & m);
void operator-=(const Matrix2 & m);
void scale(float s);
void scale(Vector2::Arg s);
float determinant() const;
private:
float m_data[4];
};
// Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix2 & m, const Vector2 & b, Vector2 * x);
// Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x);
// 3x3 matrix. // 3x3 matrix.
class NVMATH_CLASS Matrix3 class NVMATH_CLASS Matrix3
{ {
@ -52,6 +92,8 @@ namespace nv
// Solve equation system using Cramer's inverse. // Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x); extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
extern Matrix3 inverse(const Matrix3 & m);
// 4x4 matrix. // 4x4 matrix.
class NVMATH_CLASS Matrix class NVMATH_CLASS Matrix
@ -106,7 +148,6 @@ namespace nv
// Compute inverse using Gaussian elimination and partial pivoting. // Compute inverse using Gaussian elimination and partial pivoting.
extern Matrix inverse(const Matrix & m); extern Matrix inverse(const Matrix & m);
extern Matrix3 inverse(const Matrix3 & m);
} // nv namespace } // nv namespace

@ -8,6 +8,199 @@
namespace nv namespace nv
{ {
inline Matrix2::Matrix2() {}
inline Matrix2::Matrix2(float f)
{
for(int i = 0; i < 4; i++) {
m_data[i] = f;
}
}
inline Matrix2::Matrix2(identity_t)
{
for(int i = 0; i < 2; i++) {
for(int j = 0; j < 2; j++) {
m_data[2*j+i] = (i == j) ? 1.0f : 0.0f;
}
}
}
inline Matrix2::Matrix2(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] = m.m_data[i];
}
}
inline Matrix2::Matrix2(Vector2::Arg v0, Vector2::Arg v1)
{
m_data[0] = v0.x; m_data[1] = v0.y;
m_data[2] = v1.x; m_data[3] = v1.y;
}
inline Matrix2::Matrix2(float a, float b, float c, float d)
{
m_data[0] = a; m_data[1] = b;
m_data[2] = c; m_data[3] = d;
}
inline float Matrix2::data(uint idx) const
{
nvDebugCheck(idx < 4);
return m_data[idx];
}
inline float & Matrix2::data(uint idx)
{
nvDebugCheck(idx < 4);
return m_data[idx];
}
inline float Matrix2::get(uint row, uint col) const
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline float Matrix2::operator()(uint row, uint col) const
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline float & Matrix2::operator()(uint row, uint col)
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline Vector2 Matrix2::row(uint i) const
{
nvDebugCheck(i < 2);
return Vector2(get(i, 0), get(i, 1));
}
inline Vector2 Matrix2::column(uint i) const
{
nvDebugCheck(i < 2);
return Vector2(get(0, i), get(1, i));
}
inline void Matrix2::operator*=(float s)
{
for(int i = 0; i < 4; i++) {
m_data[i] *= s;
}
}
inline void Matrix2::operator/=(float s)
{
float is = 1.0f /s;
for(int i = 0; i < 4; i++) {
m_data[i] *= is;
}
}
inline void Matrix2::operator+=(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] += m.m_data[i];
}
}
inline void Matrix2::operator-=(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] -= m.m_data[i];
}
}
inline Matrix2 operator+(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m = a;
m += b;
return m;
}
inline Matrix2 operator-(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m = a;
m -= b;
return m;
}
inline Matrix2 operator*(const Matrix2 & a, float s)
{
Matrix2 m = a;
m *= s;
return m;
}
inline Matrix2 operator*(float s, const Matrix2 & a)
{
Matrix2 m = a;
m *= s;
return m;
}
inline Matrix2 operator/(const Matrix2 & a, float s)
{
Matrix2 m = a;
m /= s;
return m;
}
inline Matrix2 mul(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m;
for(int i = 0; i < 2; i++) {
const float ai0 = a(i,0), ai1 = a(i,1);
m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0);
m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1);
}
return m;
}
inline Matrix2 operator*(const Matrix2 & a, const Matrix2 & b)
{
return mul(a, b);
}
// Transform the given 3d vector with the given matrix.
inline Vector2 transform(const Matrix2 & m, const Vector2 & p)
{
return Vector2(p.x * m(0,0) + p.y * m(0,1),
p.x * m(1,0) + p.y * m(1,1));
}
inline void Matrix2::scale(float s)
{
for (int i = 0; i < 4; i++) {
m_data[i] *= s;
}
}
inline void Matrix2::scale(Vector2::Arg s)
{
m_data[0] *= s.x; m_data[1] *= s.x;
m_data[2] *= s.y; m_data[3] *= s.y;
}
inline float Matrix2::determinant() const
{
return get(0,0) * get(1,1) - get(0,1) * get(1,0);
}
// Inverse using Cramer's rule.
inline Matrix2 inverseCramer(const Matrix2 & m)
{
const float det = m.determinant();
if (equal(det, 0.0f, 0.0f)) {
return Matrix2(0);
}
return m * (1/det);
}
inline Matrix3::Matrix3() {} inline Matrix3::Matrix3() {}
inline Matrix3::Matrix3(float f) inline Matrix3::Matrix3(float f)
@ -16,7 +209,7 @@ namespace nv
m_data[i] = f; m_data[i] = f;
} }
} }
inline Matrix3::Matrix3(identity_t) inline Matrix3::Matrix3(identity_t)
{ {
for(int i = 0; i < 3; i++) { for(int i = 0; i < 3; i++) {
@ -794,7 +987,7 @@ v1 = FXVector3.Cross(v3, v2);
v1.Normalize(); v1.Normalize();
Matrix R = Matrix::Identity; Matrix R = Matrix::Identity;
R[0, 0] = v3.X; // Not sure this is in the correct order... R[0, 0] = v3.X; // Not sure this is in the correct order...
R[1, 0] = v3.Y; R[1, 0] = v3.Y;
R[2, 0] = v3.Z; R[2, 0] = v3.Z;
R[0, 1] = v1.X; R[0, 1] = v1.X;

@ -7,10 +7,6 @@
#include "nvmath.h" #include "nvmath.h"
#include "Vector.h" #include "Vector.h"
#if NV_USE_ALTIVEC
#undef vector
#endif
namespace nv namespace nv
{ {
class Matrix; class Matrix;
@ -29,6 +25,7 @@ namespace nv
Vector3 vector() const; Vector3 vector() const;
float offset() const; float offset() const;
Vector3 normal() const;
void operator*=(float s); void operator*=(float s);

@ -24,6 +24,7 @@ namespace nv
inline Vector3 Plane::vector() const { return v.xyz(); } inline Vector3 Plane::vector() const { return v.xyz(); }
inline float Plane::offset() const { return v.w; } inline float Plane::offset() const { return v.w; }
inline Vector3 Plane::normal() const { return normalize(vector(), 0.0f); }
// Normalize plane. // Normalize plane.
inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON) inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)

File diff suppressed because it is too large Load Diff

@ -194,15 +194,20 @@ namespace nv
#endif #endif
} }
inline uint log2(uint i) inline uint log2(uint32 i)
{ {
uint value = 0; uint32 value = 0;
while( i >>= 1 ) { while( i >>= 1 ) value++;
value++;
}
return value; return value;
} }
inline uint log2(uint64 i)
{
uint64 value = 0;
while (i >>= 1) value++;
return U32(value);
}
inline float lerp(float f0, float f1, float t) inline float lerp(float f0, float f1, float t)
{ {
const float s = 1.0f - t; const float s = 1.0f - t;

@ -106,6 +106,11 @@ namespace nv {
#error "Atomics not implemented." #error "Atomics not implemented."
#endif #endif
} }
inline void storeRelease(volatile float * ptr, float value)
{
storeRelease((uint32 *)ptr, *(uint32 *)&value);
}
template <typename T> template <typename T>

@ -17,7 +17,7 @@ struct Event::Private {
}; };
Event::Event() : m(new Private) { Event::Event() : m(new Private) {
m->handle = CreateEvent(NULL, FALSE, FALSE, NULL); m->handle = CreateEvent(/*lpEventAttributes=*/NULL, /*bManualReset=*/FALSE, /*bInitialState=*/FALSE, /*lpName=*/NULL);
} }
Event::~Event() { Event::~Event() {

@ -13,7 +13,9 @@
#endif // NV_OS #endif // NV_OS
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h> #include <telemetry.h>
extern HTELEMETRY tmContext; extern HTELEMETRY tmContext;
#endif #endif
@ -45,14 +47,19 @@ Mutex::~Mutex ()
void Mutex::lock() void Mutex::lock()
{ {
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmStartWaitForLock(0, 0, this, m->name);
#elif NV_USE_TELEMETRY
TmU64 matcher; TmU64 matcher;
tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked");
#endif #endif
EnterCriticalSection(&m->mutex); EnterCriticalSection(&m->mutex);
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmEndWaitForLock(0);
tmAcquiredLock(0, 0, this, m->name);
#elif NV_USE_TELEMETRY
tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS); tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS);
tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired"); tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired");
#endif #endif
@ -60,7 +67,18 @@ void Mutex::lock()
bool Mutex::tryLock() bool Mutex::tryLock()
{ {
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmStartWaitForLock(0, 0, this, m->name);
if (TryEnterCriticalSection(&m->mutex) != 0) {
tmEndWaitForLock(0);
tmAcquiredLock(0, 0, this, m->name);
return true;
}
else {
tmEndWaitForLock(0);
return false;
}
#elif NV_USE_TELEMETRY
TmU64 matcher; TmU64 matcher;
tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked");
if (TryEnterCriticalSection(&m->mutex) != 0) { if (TryEnterCriticalSection(&m->mutex) != 0) {
@ -79,7 +97,9 @@ bool Mutex::tryLock()
void Mutex::unlock() void Mutex::unlock()
{ {
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmReleasedLock(0, this);
#elif NV_USE_TELEMETRY
tmSetLockState(tmContext, this, TMLS_RELEASED, "released"); tmSetLockState(tmContext, this, TMLS_RELEASED, "released");
#endif #endif
@ -90,13 +110,17 @@ void Mutex::unlock()
struct Mutex::Private { struct Mutex::Private {
pthread_mutex_t mutex; pthread_mutex_t mutex;
pthread_mutexattr_t attr;
const char * name; const char * name;
}; };
Mutex::Mutex (const char * name) : m(new Private) Mutex::Mutex (const char * name) : m(new Private)
{ {
int result = pthread_mutex_init(&m->mutex, NULL); pthread_mutexattr_init(&m->attr);
pthread_mutexattr_settype(&m->attr, PTHREAD_MUTEX_RECURSIVE);
int result = pthread_mutex_init(&m->mutex, &m->attr);
//m->mutex = PTHREAD_MUTEX_INITIALIZER;
m->name = name; m->name = name;
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
} }
@ -105,6 +129,8 @@ Mutex::~Mutex ()
{ {
int result = pthread_mutex_destroy(&m->mutex); int result = pthread_mutex_destroy(&m->mutex);
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
result = pthread_mutexattr_destroy(&m->attr);
nvDebugCheck(result == 0);
} }
void Mutex::lock() void Mutex::lock()

@ -9,7 +9,11 @@
#include <unistd.h> // usleep #include <unistd.h> // usleep
#endif #endif
#if NV_USE_TELEMETRY #include "nvcore/StrLib.h"
#if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h> #include <telemetry.h>
extern HTELEMETRY tmContext; extern HTELEMETRY tmContext;
#endif #endif
@ -118,16 +122,12 @@ void Thread::start(ThreadFunc * func, void * arg)
nvDebugCheck(p->thread != NULL); nvDebugCheck(p->thread != NULL);
if (p->name != NULL) { if (p->name != NULL) {
setThreadName(threadId, p->name); setThreadName(threadId, p->name);
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmThreadName(0, threadId, p->name);
#elif NV_USE_TELEMETRY
tmThreadName(tmContext, threadId, p->name); tmThreadName(tmContext, threadId, p->name);
#endif #endif
} }
#elif NV_OS_ORBIS
int ret = scePthreadCreate(&p->thread, NULL, threadFunc, p.ptr(), p->name ? p->name : "nv::Thread");
nvDebugCheck(ret == 0);
// use any non-system core
scePthreadSetaffinity(p->thread, 0x3F);
scePthreadSetprio(p->thread, (SCE_KERNEL_PRIO_FIFO_DEFAULT + SCE_KERNEL_PRIO_FIFO_HIGHEST) / 2);
#elif NV_OS_USE_PTHREAD #elif NV_OS_USE_PTHREAD
int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr()); int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr());
nvDebugCheck(result == 0); nvDebugCheck(result == 0);

@ -8,7 +8,9 @@
#include "nvcore/Utils.h" #include "nvcore/Utils.h"
#include "nvcore/StrLib.h" #include "nvcore/StrLib.h"
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h> #include <telemetry.h>
extern HTELEMETRY tmContext; extern HTELEMETRY tmContext;
#endif #endif
@ -84,7 +86,9 @@ AutoPtr<ThreadPool> s_pool;
} }
{ {
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmZone(0, TMZF_NONE, "worker");
#elif NV_USE_TELEMETRY
tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker"); tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker");
#endif #endif
func(s_pool->arg, s_pool->useCallingThread + i); func(s_pool->arg, s_pool->useCallingThread + i);
@ -116,11 +120,11 @@ ThreadPool::ThreadPool(uint workerCount/*=processorCount()*/, bool useThreadAffi
lockThreadToProcessor(0); // Calling thread always locked to processor 0. lockThreadToProcessor(0); // Calling thread always locked to processor 0.
} }
StringBuilder name;
for (uint i = 0; i < threadCount; i++) { for (uint i = 0; i < threadCount; i++) {
StringBuilder name;
name.format("worker %d", i); name.format("worker %d", i);
workers[i].setName(name.release()); // @Leak workers[i].setName(name.release()); // @Leak
workers[i].start(workerFunc, (void *)i); workers[i].start(workerFunc, (void *)(uintptr_t)i);
} }
allIdle = true; allIdle = true;
@ -141,9 +145,6 @@ ThreadPool::~ThreadPool()
void ThreadPool::run(ThreadTask * func, void * arg) void ThreadPool::run(ThreadTask * func, void * arg)
{ {
// Wait until threads are idle.
wait();
start(func, arg); start(func, arg);
if (useCallingThread) { if (useCallingThread) {

@ -85,7 +85,9 @@ uint nv::processorCount() {
return count; return count;
#elif NV_OS_ORBIS #elif NV_OS_ORBIS
return 6; return 6;
#elif NV_OS_DURANGO
return 6;
#elif NV_OS_XBOX #elif NV_OS_XBOX
return 3; // or 6? return 3; // or 6?
#elif NV_OS_LINUX || NV_OS_NETBSD // Linux, Solaris, & AIX #elif NV_OS_LINUX || NV_OS_NETBSD // Linux, Solaris, & AIX

@ -25,6 +25,7 @@
#include "BlockCompressor.h" #include "BlockCompressor.h"
#include "OutputOptions.h" #include "OutputOptions.h"
#include "TaskDispatcher.h" #include "TaskDispatcher.h"
#include "CompressionOptions.h"
#include "nvimage/Image.h" #include "nvimage/Image.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
@ -33,6 +34,7 @@
#include "nvmath/Vector.inl" #include "nvmath/Vector.inl"
#include "nvcore/Memory.h" #include "nvcore/Memory.h"
#include "nvcore/Array.inl"
#include <new> // placement new #include <new> // placement new
@ -40,85 +42,13 @@
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
/*
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
#if defined(HAVE_OPENMP)
bool singleThreaded = false;
#else
bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{
nvDebugCheck(bs <= 16);
uint8 mem[16]; // @@ Output one row at a time!
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
rgba.init(w, h, data, x, y);
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs);
}
}
}
}
#if defined(HAVE_OPENMP)
else
{
const uint size = bs * bw * bh;
uint8 * mem = new uint8[size];
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < int(bw*bh); i++)
{
const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
rgba.init(w, h, data, 4*x, 4*y);
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
}
#endif
}
*/
struct CompressorContext struct CompressorContext
{ {
nvtt::AlphaMode alphaMode; AlphaMode alphaMode;
uint w, h, d; uint w, h, d;
const float * data; const float * data;
const nvtt::CompressionOptions::Private * compressionOptions; const CompressionOptions::Private * compressionOptions;
uint bw, bh, bs; uint bw, bh, bs;
uint8 * mem; uint8 * mem;
@ -144,7 +74,7 @@ void ColorBlockCompressorTask(void * data, int i)
} }
} }
void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) void ColorBlockCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
nvDebugCheck(d == 1); nvDebugCheck(d == 1);
@ -182,66 +112,6 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
delete [] context.mem; delete [] context.mem;
} }
#if 0
// Each task compresses one block.
void ColorSetCompressorTask(void * data, int i)
{
CompressorContext * d = (CompressorContext *) data;
uint x = i % d->bw;
uint y = i / d->bw;
//for (uint x = 0; x < d->bw; x++)
{
ColorSet set;
set.setColors(d->data, d->w, d->h, x * 4, y * 4);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
((ColorSetCompressor *)d->compressor)->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr);
}
}
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
CompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
context.compressor = this;
SequentialTaskDispatcher sequential;
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
#if _DEBUG
dispatcher = &sequential;
#endif
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(ColorSetCompressorTask, &context, count);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}
#endif // 0
// Each task compresses one block. // Each task compresses one block.
void FloatColorCompressorTask(void * data, int i) void FloatColorCompressorTask(void * data, int i)
{ {
@ -262,8 +132,8 @@ void FloatColorCompressorTask(void * data, int i)
Vector4 colors[16]; Vector4 colors[16];
float weights[16]; float weights[16];
const uint block_w = min(d->w - block_x * 4U, 4U); const uint block_w = min(d->w - block_x * 4, 4U);
const uint block_h = min(d->h - block_y * 4U, 4U); const uint block_h = min(d->h - block_y * 4, 4U);
uint x, y; uint x, y;
for (y = 0; y < block_h; y++) { for (y = 0; y < block_h; y++) {
@ -274,7 +144,7 @@ void FloatColorCompressorTask(void * data, int i)
colors[dst_idx].y = g[src_idx]; colors[dst_idx].y = g[src_idx];
colors[dst_idx].z = b[src_idx]; colors[dst_idx].z = b[src_idx];
colors[dst_idx].w = a[src_idx]; colors[dst_idx].w = a[src_idx];
weights[dst_idx] = (d->alphaMode == nvtt::AlphaMode_Transparency) ? a[src_idx] : 1.0f; weights[dst_idx] = (d->alphaMode == AlphaMode_Transparency) ? saturate(a[src_idx]) : 1.0f;
} }
for (; x < 4; x++) { for (; x < 4; x++) {
uint dst_idx = 4 * y + x; uint dst_idx = 4 * y + x;
@ -289,14 +159,14 @@ void FloatColorCompressorTask(void * data, int i)
weights[dst_idx] = 0.0f; weights[dst_idx] = 0.0f;
} }
} }
// Compress block. // Compress block.
uint8 * output = d->mem + (block_y * d->bw + block_x) * d->bs; uint8 * output = d->mem + (block_y * d->bw + block_x) * d->bs;
((FloatColorCompressor *)d->compressor)->compressBlock(colors, weights, *d->compressionOptions, output); ((FloatColorCompressor *)d->compressor)->compressBlock(colors, weights, *d->compressionOptions, output);
} }
void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures. nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures.
@ -308,7 +178,7 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
context.data = data; context.data = data;
context.compressionOptions = &compressionOptions; context.compressionOptions = &compressionOptions;
context.bs = blockSize(); context.bs = blockSize(compressionOptions);
context.bw = (w + 3) / 4; context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4; context.bh = (h + 3) / 4;
@ -333,3 +203,466 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
delete [] context.mem; delete [] context.mem;
} }
// BC1
#include "CompressorDXT1.h"
void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1_fast(colors, weights, compressionOptions.colorWeight.xyz(), (BlockDXT1 *)output);
}
void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output);
}
// @@ BC1a
// @@ BC2
// @@ BC3
// BC3_RGBM
#include "CompressorDXT5_RGBM.h"
void CompressorBC3_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt5_rgbm(colors, weights, compressionOptions.rgbmThreshold, (BlockDXT5 *)output);
}
// ETC
#include "CompressorETC.h"
void CompressorETC1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_etc1(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
void CompressorETC2_R::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Change radius based on quality.
compress_eac(colors, weights, /*input_channel=*/1, /*search_radius=*/1, /*use_11bit_mode=*/true, output);
}
void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
//compress_eac_rg(colors, weights, 1, 2, output);
}
void CompressorETC2_RGB::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Tweak quality options.
compress_etc2(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
void CompressorETC2_RGBA::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Tweak quality options.
// @@ Change radius based on quality.
compress_etc2_eac(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
/*void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Change radius based on quality.
compress_eac_rg(colors, weights, compressionOptions.colorWeight.xyz(), output);
}*/
void CompressorETC2_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_etc2_rgbm(colors, weights, compressionOptions.rgbmThreshold, output);
}
// External compressors.
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
void AtiCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
// @@ Floating point input is not swizzled.
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
void SquishCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
nvDebugCheck(false);
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)
#if defined(HAVE_ETCLIB)
#include "Etc.h"
void EtcLibCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
//nvCheck(d == 1); // Encode one layer at a time?
Etc::Image::Format format;
if (compressionOptions.format == Format_ETC1) {
format = Etc::Image::Format::ETC1;
}
else if (compressionOptions.format == Format_ETC2_R) {
format = Etc::Image::Format::R11;
}
else if (compressionOptions.format == Format_ETC2_RG) {
format = Etc::Image::Format::RG11;
}
else if (compressionOptions.format == Format_ETC2_RGB) {
format = Etc::Image::Format::RGB8;
//format = Etc::Image::Format::SRGB8;
}
else if (compressionOptions.format == Format_ETC2_RGBA) {
format = Etc::Image::Format::RGBA8;
//format = Etc::Image::Format::SRGBA8;
}
else if (compressionOptions.format == Format_ETC2_RGB_A1) {
format = Etc::Image::Format::RGB8A1;
//format = Etc::Image::Format::SRGB8A1;
}
else {
nvCheck(false);
return;
}
Etc::ErrorMetric error_metric = Etc::ErrorMetric::RGBA;
// @@ Use normal compression metric for normals?
//if (compressionOptions.)
// @@ Adjust based on quality.
int effort = ETCCOMP_DEFAULT_EFFORT_LEVEL;
// @@ What are the defaults?
uint jobs = 4;
uint max_jobs = 4;
uint8 * out_data = NULL;
uint out_size = 0;
uint out_width = 0;
uint out_height = 0;
int out_time = 0;
// Swizzle color data.
nv::Array<float> tmp;
uint count = w * h;
tmp.resize(4 * count);
for (uint i = 0; i < count; i++) {
tmp[4*i+0] = data[count*0 + i];
tmp[4*i+1] = data[count*1 + i];
tmp[4*i+2] = data[count*2 + i];
tmp[4*i+3] = data[count*3 + i];
}
Etc::Encode(tmp.buffer(), w, h, format, error_metric, effort, jobs, max_jobs, &out_data, &out_size, &out_width, &out_height, &out_time);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(out_data, I32(out_size));
}
}
#endif
#if defined(HAVE_RGETC)
#include "rg_etc1.h"
NV_AT_STARTUP(rg_etc1::pack_etc1_block_init());
void RgEtcCompressor::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
rg_etc1::etc1_pack_params pack_params;
pack_params.m_quality = rg_etc1::cMediumQuality;
if (compressionOptions.quality == Quality_Fastest) pack_params.m_quality = rg_etc1::cLowQuality;
else if (compressionOptions.quality == Quality_Production) pack_params.m_quality = rg_etc1::cHighQuality;
else if (compressionOptions.quality == Quality_Highest) pack_params.m_quality = rg_etc1::cHighQuality;
else if (compressionOptions.quality == Quality_Normal) pack_params.m_quality = rg_etc1::cMediumQuality;
rgba.swizzle(2, 1, 0, 3);
rg_etc1::pack_etc1_block(output, (uint *)rgba.colors(), pack_params);
//Vector4 result[16];
//nv::decompress_etc(output, result);
}
#endif
#if defined(HAVE_PVRTEXTOOL)
#include <PVRTextureUtilities.h> // for CPVRTexture, CPVRTextureHeader, PixelType, Transcode
#include "nvmath/Color.inl"
void CompressorPVR::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
EPVRTColourSpace color_space = ePVRTCSpacelRGB;
//pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('b','g','r','a',8,8,8,8);
pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('r','g','b',0,8,8,8,0);
pvrtexture::CPVRTextureHeader header(src_pixel_type.PixelTypeID, w, h, d, 1/*num mips*/, 1/*num array*/, 1/*num faces*/, color_space, ePVRTVarTypeUnsignedByteNorm);
/*
uint count = w * h * d;
Array<Color32> tmp;
tmp.resize(count);
for (uint i = 0; i < count; i++) {
tmp[i] = toColor32(Vector4(data[0*count + i], data[1*count + i], data[2*count + i], data[3*count + i]));
}
*/
uint count = w * h * d;
Array<uint8> tmp;
tmp.resize(3 * count);
for (uint i = 0; i < count; i++) {
tmp[3*i+0] = data[0*count + i] * 255.0f;
tmp[3*i+1] = data[1*count + i] * 255.0f;
tmp[3*i+2] = data[2*count + i] * 255.0f;
}
pvrtexture::CPVRTexture texture(header, tmp.buffer());
pvrtexture::PixelType dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB);
if (compressionOptions.format == Format_PVR_2BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB);
else if (compressionOptions.format == Format_PVR_4BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGB);
else if (compressionOptions.format == Format_PVR_2BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGBA);
else if (compressionOptions.format == Format_PVR_4BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGBA);
bool success = pvrtexture::Transcode(texture, dst_pixel_type, ePVRTVarTypeUnsignedByteNorm, color_space, pvrtexture::ePVRTCNormal, false);
if (success) {
uint size = 0;
if (compressionOptions.format == Format_PVR_2BPP_RGB || compressionOptions.format == Format_PVR_2BPP_RGBA) {
// 2 bpp
const uint bpp = 2u;
const uint block_size = 8u * 4u;
const uint size_factor=(block_size*bpp)>>3u;
const uint block_width=nv::max((w>>3u), 2u);
const uint block_height=nv::max((h>>2u), 2u);
size = d * block_width * block_height * size_factor;
}
else {
// 4 bpp
const uint bpp = 4u;
const uint block_size = 4u * 4u;
const uint size_factor = (block_size*bpp) >> 3u;
const uint block_width = max((w>>2u), 2u);
const uint block_height = max((h>>2u), 2u);
size = d * block_width * block_height * size_factor;
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(texture.getDataPtr(), I32(size));
}
}
}
#endif

@ -27,7 +27,6 @@
#include "Compressor.h" #include "Compressor.h"
namespace nv namespace nv
{ {
struct ColorBlock; struct ColorBlock;
@ -45,9 +44,149 @@ namespace nv
{ {
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const = 0; virtual uint blockSize(const nvtt::CompressionOptions::Private & compressionOptions) const = 0;
};
// BC1
struct FastCompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
struct CompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
// BC3
struct CompressorBC3_RGBM : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; }
};
// ETC
struct CompressorETC1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
struct CompressorETC2_R : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; }
};
struct CompressorETC2_RG : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorETC2_RGB : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; }
};
struct CompressorETC2_RGBA : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorETC2_RGBM : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
#if NV_USE_CRUNCH
struct CrunchCompressorETC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if NV_USE_INTEL_ISPC_TC
struct IspcCompressorBC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorBC3 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorBC7 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorETC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_ETCLIB)
struct EtcLibCompressor : public CompressorInterface
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_RGETC)
struct RgEtcCompressor : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
#if defined(HAVE_PVRTEXTOOL)
struct CompressorPVR : public CompressorInterface
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
}; };
#endif
} // nv namespace } // nv namespace

@ -13,6 +13,7 @@ SET(NVTT_SRCS
CompressorDX11.h CompressorDX11.cpp CompressorDX11.h CompressorDX11.cpp
CompressorDXT1.h CompressorDXT1.cpp CompressorDXT1.h CompressorDXT1.cpp
CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp
CompressorETC.h CompressorETC.cpp
CompressorRGB.h CompressorRGB.cpp CompressorRGB.h CompressorRGB.cpp
Context.h Context.cpp Context.h Context.cpp
QuickCompressDXT.h QuickCompressDXT.cpp QuickCompressDXT.h QuickCompressDXT.cpp
@ -38,6 +39,7 @@ IF (CUDA_FOUND)
ENDIF (CUDA_FOUND) ENDIF (CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104)
ADD_DEFINITIONS(-DNVTT_EXPORTS) ADD_DEFINITIONS(-DNVTT_EXPORTS)
@ -47,7 +49,7 @@ ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS}) ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED) ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath) TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath rg_etc1)
INSTALL(TARGETS nvtt INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin RUNTIME DESTINATION bin

@ -38,79 +38,6 @@ ClusterFit::ClusterFit()
{ {
} }
#if 0 // @@ Deprecate. Do not use color set directly.
void ClusterFit::setColorSet(const ColorSet * set)
{
// initialise the best error
#if NVTT_USE_SIMD
m_besterror = SimdVector( FLT_MAX );
Vector3 metric = m_metric.toVector3();
#else
m_besterror = FLT_MAX;
Vector3 metric = m_metric;
#endif
// cache some values
m_count = set->colorCount;
Vector3 values[16];
for (uint i = 0; i < m_count; i++)
{
values[i] = set->colors[i].xyz();
}
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric);
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric);
// build the list of values
int order[16];
float dps[16];
for (uint i = 0; i < m_count; ++i)
{
dps[i] = dot(values[i], principal);
order[i] = i;
}
// stable sort
for (uint i = 0; i < m_count; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
// weight all the points
#if NVTT_USE_SIMD
m_xxsum = SimdVector( 0.0f );
m_xsum = SimdVector( 0.0f );
#else
m_xxsum = Vector3(0.0f);
m_xsum = Vector3(0.0f);
m_wsum = 0.0f;
#endif
for (uint i = 0; i < m_count; ++i)
{
int p = order[i];
#if NVTT_USE_SIMD
NV_ALIGN_16 Vector4 tmp(values[p], 1);
m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]);
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
#else
m_weighted[i] = values[p] * set->weights[p];
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
m_weights[i] = set->weights[p];
m_wsum += m_weights[i];
#endif
}
}
#endif // 0
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count) void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
{ {
// initialise the best error // initialise the best error
@ -412,13 +339,13 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
#else #else
inline Vector3 round565(const Vector3 & v) { inline Vector3 round565(const Vector3 & v) {
uint r = ftoi_trunc(v.x * 31.0f); uint r = ftoi_trunc(v.x * 31.0f);
float r0 = float(((r+0) << 3) | ((r+0) >> 2)); float r0 = float(((r+0) << 3) | ((r+0) >> 2));
float r1 = float(((r+1) << 3) | ((r+1) >> 2)); float r1 = float(((r+1) << 3) | ((r+1) >> 2));
if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U); if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U);
r = (r << 3) | (r >> 2); r = (r << 3) | (r >> 2);
uint g = ftoi_trunc(v.y * 63.0f); uint g = ftoi_trunc(v.y * 63.0f);
float g0 = float(((g+0) << 2) | ((g+0) >> 4)); float g0 = float(((g+0) << 2) | ((g+0) >> 4));
float g1 = float(((g+1) << 2) | ((g+1) >> 4)); float g1 = float(((g+1) << 2) | ((g+1) >> 4));
if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U); if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U);
@ -428,8 +355,8 @@ inline Vector3 round565(const Vector3 & v) {
float b0 = float(((b+0) << 3) | ((b+0) >> 2)); float b0 = float(((b+0) << 3) | ((b+0) >> 2));
float b1 = float(((b+1) << 3) | ((b+1) >> 2)); float b1 = float(((b+1) << 3) | ((b+1) >> 2));
if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U); if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U);
b = (b << 3) | (b >> 2); b = (b << 3) | (b >> 2);
return Vector3(float(r)/255, float(g)/255, float(b)/255); return Vector3(float(r)/255, float(g)/255, float(b)/255);
} }

@ -50,7 +50,8 @@ void CompressionOptions::reset()
m.format = Format_DXT1; m.format = Format_DXT1;
m.quality = Quality_Normal; m.quality = Quality_Normal;
m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f); m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f);
m.rgbmThreshold = 0.15f;
m.bitcount = 32; m.bitcount = 32;
m.bmask = 0x000000FF; m.bmask = 0x000000FF;
m.gmask = 0x0000FF00; m.gmask = 0x0000FF00;
@ -102,6 +103,11 @@ void CompressionOptions::setColorWeights(float red, float green, float blue, flo
m.colorWeight.set(red, green, blue, alpha); m.colorWeight.set(red, green, blue, alpha);
} }
void CompressionOptions::setRGBMThreshold(float min_m)
{
m.rgbmThreshold = min_m;
}
/// Set color mask to describe the RGB/RGBA format. /// Set color mask to describe the RGB/RGBA format.
void CompressionOptions::setPixelFormat(uint bitCount, uint rmask, uint gmask, uint bmask, uint amask) void CompressionOptions::setPixelFormat(uint bitCount, uint rmask, uint gmask, uint bmask, uint amask)
@ -162,7 +168,7 @@ void CompressionOptions::setPixelType(PixelType pixelType)
/// Set pitch alignment in bytes. /// Set pitch alignment in bytes.
void CompressionOptions::setPitchAlignment(int pitchAlignment) void CompressionOptions::setPitchAlignment(int pitchAlignment)
{ {
nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(pitchAlignment)); nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(U32(pitchAlignment)));
m.pitchAlignment = pitchAlignment; m.pitchAlignment = pitchAlignment;
} }
@ -194,6 +200,10 @@ void CompressionOptions::setTargetDecoder(Decoder decoder)
} }
Format CompressionOptions::format() const
{
return m.format;
}
// Translate to and from D3D formats. // Translate to and from D3D formats.
unsigned int CompressionOptions::d3d9Format() const unsigned int CompressionOptions::d3d9Format() const
@ -246,10 +256,20 @@ unsigned int CompressionOptions::d3d9Format() const
FOURCC_ATI2, // Format_BC5 FOURCC_ATI2, // Format_BC5
FOURCC_DXT1, // Format_DXT1n FOURCC_DXT1, // Format_DXT1n
0, // Format_CTX1 0, // Format_CTX1
MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6 FOURCC_BC6H, // Format_BC6
MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7 FOURCC_BC7L, // Format_BC7
//FOURCC_ATI2, // Format_BC5_Luma FOURCC_DXT5, // Format_BC3_RGBM
FOURCC_DXT5, // Format_BC3_RGBM NV_MAKEFOURCC('E', 'T', 'C', '1'), // Format_ETC1
0, // Format_ETC2_R
0, // Format_ETC2_RG
NV_MAKEFOURCC('E', 'T', 'C', '2'), // Format_ETC2_RGB
0, // Format_ETC2_RGBA
0, // Format_ETC2_RGB_A1
0, // Format_ETC2_RGBM
FOURCC_PVR0,
FOURCC_PVR1,
FOURCC_PVR2,
FOURCC_PVR3,
}; };
NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count); NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count);
@ -258,12 +278,80 @@ unsigned int CompressionOptions::d3d9Format() const
} }
} }
/* unsigned int CompressionOptions::dxgiFormat() const // @@ Add srgb flag.
bool CompressionOptions::setDirect3D9Format(unsigned int format)
{ {
if (m.format == Format_RGB) {
if (m.pixelType == PixelType_UnsignedNorm) {
uint bitcount = m.bitcount;
uint rmask = m.rmask;
uint gmask = m.gmask;
uint bmask = m.bmask;
uint amask = m.amask;
if (bitcount == 0) {
bitcount = m.rsize + m.gsize + m.bsize + m.asize;
rmask = ((1 << m.rsize) - 1) << (m.asize + m.bsize + m.gsize);
gmask = ((1 << m.gsize) - 1) << (m.asize + m.bsize);
bmask = ((1 << m.bsize) - 1) << m.asize;
amask = ((1 << m.asize) - 1) << 0;
}
if (bitcount <= 32) {
return nv::findDXGIFormat(bitcount, rmask, gmask, bmask, amask);
}
else {
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_UNORM;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_UNORM;
}
}
else if (m.pixelType == PixelType_Float) {
if (m.rsize == 16 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16_FLOAT;
if (m.rsize == 32 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32_FLOAT;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_FLOAT;
if (m.rsize == 32 && m.gsize == 32 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32G32_FLOAT;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_FLOAT;
if (m.rsize == 32 && m.gsize == 32 && m.bsize == 32 && m.asize == 32) return DXGI_FORMAT_R32G32B32A32_FLOAT;
}
return 0;
}
else {
uint dxgi_formats[] = {
0, // Format_RGB,
DXGI_FORMAT_BC1_UNORM, // Format_DXT1
DXGI_FORMAT_BC1_UNORM, // Format_DXT1a
DXGI_FORMAT_BC2_UNORM, // Format_DXT3
DXGI_FORMAT_BC3_UNORM, // Format_DXT5
DXGI_FORMAT_BC3_UNORM, // Format_DXT5n
DXGI_FORMAT_BC4_UNORM, // Format_BC4
DXGI_FORMAT_BC5_UNORM, // Format_BC5
DXGI_FORMAT_BC1_UNORM, // Format_DXT1n
0, // Format_CTX1
DXGI_FORMAT_BC6H_UF16, // Format_BC6
DXGI_FORMAT_BC7_UNORM, // Format_BC7
DXGI_FORMAT_BC5_UNORM, // Format_BC3_RGBM
0, // Format_ETC1
0, // Format_ETC2_R
0, // Format_ETC2_RG
0, // Format_ETC2_RGB
0, // Format_ETC2_RGBA
0, // Format_ETC2_RGB_A1
0, // Format_ETC2_RGBM
0, // Format_PVR_2BPP_RGB
0, // Format_PVR_4BPP_RGB
0, // Format_PVR_2BPP_RGBA
0, // Format_PVR_4BPP_RGB
};
NV_COMPILER_CHECK(NV_ARRAY_SIZE(dxgi_formats) == Format_Count);
return dxgi_formats[m.format];
}
} }
unsigned int CompressionOptions::dxgiFormat() const /*
bool CompressionOptions::setDirect3D9Format(unsigned int format)
{ {
} }

@ -39,7 +39,8 @@ namespace nvtt
Quality quality; Quality quality;
nv::Vector4 colorWeight; nv::Vector4 colorWeight;
float rgbmThreshold;
// Pixel format description. // Pixel format description.
uint bitcount; uint bitcount;
uint rmask; uint rmask;

@ -30,6 +30,7 @@
namespace nv namespace nv
{ {
struct CompressorInterface struct CompressorInterface
{ {
virtual ~CompressorInterface() {} virtual ~CompressorInterface() {}

@ -39,7 +39,7 @@ using namespace nv;
using namespace nvtt; using namespace nvtt;
void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) void CompressorBC6::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{ {
// !!!UNDONE: support channel weights // !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
@ -77,7 +77,7 @@ void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[
ZOH::compress(zohTile, (char *)output); ZOH::compress(zohTile, (char *)output);
} }
void CompressorBC7::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) void CompressorBC7::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{ {
// !!!UNDONE: support channel weights // !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)

@ -30,14 +30,14 @@ namespace nv
{ {
struct CompressorBC6 : public FloatColorCompressor struct CompressorBC6 : public FloatColorCompressor
{ {
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; } virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
}; };
struct CompressorBC7 : public FloatColorCompressor struct CompressorBC7 : public FloatColorCompressor
{ {
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; } virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
}; };
} // nv namespace } // nv namespace

@ -28,7 +28,7 @@
#include "CompressionOptions.h" #include "CompressionOptions.h"
#include "OutputOptions.h" #include "OutputOptions.h"
#include "ClusterFit.h" #include "ClusterFit.h"
#include "CompressorDXT1.h" //#include "CompressorDXT1.h"
#include "CompressorDXT5_RGBM.h" #include "CompressorDXT5_RGBM.h"
// squish // squish
@ -48,45 +48,11 @@
#include <new> // placement new #include <new> // placement new
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
#endif
// squish
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
#endif
// d3dx
#if defined(HAVE_D3DX)
#include <d3dx9.h>
#endif
// stb
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
#endif
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
QuickCompress::compressDXT1(rgba, block);
}
void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{ {
@ -115,39 +81,13 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
} }
#if 1
void CompressorDXT1::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output);
}
#else
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
if (rgba.isSingleColor())
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1(rgba.color(0), block);
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}
}
#endif
void CompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) void CompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{ {
uint alphaMask = 0; uint alphaMask = 0;
for (uint i = 0; i < 16; i++) for (uint i = 0; i < 16; i++)
{ {
if (rgba.color(i).a == 0) alphaMask |= (3 << (i * 2)); // Set two bits for each color. if (rgba.color(i).a == 0) alphaMask |= (3U << (i * 2U)); // Set two bits for each color.
} }
const bool isSingleColor = rgba.isSingleColor(); const bool isSingleColor = rgba.isSingleColor();
@ -284,216 +224,6 @@ void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode
} }
void CompressorBC3_RGBM::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
float min_m = 0.25f; // @@ Get from compression options.
compress_dxt5_rgbm(colors, weights, min_m, (BlockDXT5 *)output);
}
#if defined(HAVE_ATITC)
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
// @@ Floating point input is not swizzled.
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
nvDebugCheck(false);
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)

@ -32,12 +32,6 @@ namespace nv
struct ColorBlock; struct ColorBlock;
// Fast CPU compressors. // Fast CPU compressors.
struct FastCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorDXT1a : public ColorBlockCompressor struct FastCompressorDXT1a : public ColorBlockCompressor
{ {
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
@ -64,19 +58,6 @@ namespace nv
// Normal CPU compressors. // Normal CPU compressors.
#if 1
struct CompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#else
struct CompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
struct CompressorDXT1a : public ColorBlockCompressor struct CompressorDXT1a : public ColorBlockCompressor
{ {
@ -108,47 +89,9 @@ namespace nv
virtual uint blockSize() const { return 16; } virtual uint blockSize() const { return 16; }
}; };
struct CompressorBC3_RGBM : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
} // nv namespace } // nv namespace

@ -218,13 +218,13 @@ static int evaluate_mse(const Color32 & p, const Color32 & c) {
return (square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b)); return (square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b));
} }
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) { /*static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) {
float e0 = evaluate_mse(palette[0], c, w); float e0 = evaluate_mse(palette[0], c, w);
float e1 = evaluate_mse(palette[1], c, w); float e1 = evaluate_mse(palette[1], c, w);
float e2 = evaluate_mse(palette[2], c, w); float e2 = evaluate_mse(palette[2], c, w);
float e3 = evaluate_mse(palette[3], c, w); float e3 = evaluate_mse(palette[3], c, w);
return min(min(e0, e1), min(e2, e3)); return min(min(e0, e1), min(e2, e3));
} }*/
static int evaluate_mse(const Color32 palette[4], const Color32 & c) { static int evaluate_mse(const Color32 palette[4], const Color32 & c) {
int e0 = evaluate_mse(palette[0], c); int e0 = evaluate_mse(palette[0], c);
@ -245,12 +245,12 @@ static int evaluate_mse(const BlockDXT1 * output, Color32 color, int index) {
// Returns weighted MSE error in [0-255] range. // Returns weighted MSE error in [0-255] range.
static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) { static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) {
float total = 0.0f; float total = 0.0f;
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
total += weights[i] * evaluate_mse(palette, colors[i]); total += weights[i] * evaluate_mse(palette, colors[i]);
} }
return total; return total;
} }
#if 0 #if 0
@ -337,7 +337,7 @@ static void evaluate_palette(Color16 c0, Color16 c1, Vector3 palette[4]) {
} }
} }
static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) { /*static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
nvDebugCheck(c0.u > c1.u); nvDebugCheck(c0.u > c1.u);
Color32 palette32[4]; Color32 palette32[4];
@ -346,7 +346,7 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
palette[i] = color_to_vector3(palette32[i]); palette[i] = color_to_vector3(palette32[i]);
} }
} }*/
@ -355,38 +355,38 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
static uint compute_indices4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { static uint compute_indices4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) {
uint indices = 0; uint indices = 0;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights);
float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights);
float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights);
float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights);
uint b0 = d0 > d3; uint b0 = d0 > d3;
uint b1 = d1 > d2; uint b1 = d1 > d2;
uint b2 = d0 > d2; uint b2 = d0 > d2;
uint b3 = d1 > d3; uint b3 = d1 > d3;
uint b4 = d2 > d3; uint b4 = d2 > d3;
uint x0 = b1 & b2; uint x0 = b1 & b2;
uint x1 = b0 & b3; uint x1 = b0 & b3;
uint x2 = b0 & b4; uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
} }
return indices; return indices;
} }
static uint compute_indices(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { static uint compute_indices(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) {
uint indices = 0; uint indices = 0;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights);
float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights);
float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights);
float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights);
uint index; uint index;
if (d0 < d1 && d0 < d2 && d0 < d3) index = 0; if (d0 < d1 && d0 < d2 && d0 < d3) index = 0;
else if (d1 < d2 && d1 < d3) index = 1; else if (d1 < d2 && d1 < d3) index = 1;
@ -491,7 +491,8 @@ float nv::compress_dxt1_single_color(const Vector3 * colors, const float * weigh
// Decompress block color. // Decompress block color.
Color32 palette[4]; Color32 palette[4];
output->evaluatePalette(palette, /*d3d9=*/false); evaluate_palette(output->col0, output->col1, palette);
//output->evaluatePalette(palette, /*d3d9=*/false);
Vector3 block_color = color_to_vector3(palette[output->indices & 0x3]); Vector3 block_color = color_to_vector3(palette[output->indices & 0x3]);
@ -668,7 +669,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight
// This is too expensive, even with a low threshold. // This is too expensive, even with a low threshold.
// If high quality: // If high quality:
if (0) { if (/* DISABLES CODE */ (0)) {
BlockDXT1 exhaustive_output; BlockDXT1 exhaustive_output;
float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output); float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output);
@ -720,7 +721,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight
// Least squares fitting of color end points for the given indices. @@ Take weights into account. // Least squares fitting of color end points for the given indices. @@ Take weights into account.
static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b) static bool optimize_end_points4(uint indices, const Vector4 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b)
{ {
float alpha2_sum = 0.0f; float alpha2_sum = 0.0f;
float beta2_sum = 0.0f; float beta2_sum = 0.0f;
@ -739,8 +740,8 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec
alpha2_sum += alpha * alpha; alpha2_sum += alpha * alpha;
beta2_sum += beta * beta; beta2_sum += beta * beta;
alphabeta_sum += alpha * beta; alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i]; alphax_sum += alpha * colors[i].xyz();
betax_sum += beta * colors[i]; betax_sum += beta * colors[i].xyz();
} }
float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
@ -756,7 +757,7 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec
// Least squares fitting of color end points for the given indices. @@ This does not support black/transparent index. @@ Take weights into account. // Least squares fitting of color end points for the given indices. @@ This does not support black/transparent index. @@ Take weights into account.
static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b) static bool optimize_end_points3(uint indices, const Vector3 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b)
{ {
float alpha2_sum = 0.0f; float alpha2_sum = 0.0f;
float beta2_sum = 0.0f; float beta2_sum = 0.0f;
@ -794,6 +795,90 @@ static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vec
// find minimum and maximum colors based on bounding box in color space
inline static void fit_colors_bbox(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
*c0 = Vector3(0);
*c1 = Vector3(255);
for (int i = 0; i < count; i++) {
*c0 = max(*c0, colors[i]);
*c1 = min(*c1, colors[i]);
}
}
inline static void select_diagonal(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 center = (*c0 + *c1) * 0.5f;
Vector2 covariance = Vector2(0);
for (int i = 0; i < count; i++) {
Vector3 t = colors[i] - center;
covariance += t.xy() * t.z;
}
float x0 = c0->x;
float y0 = c0->y;
float x1 = c1->x;
float y1 = c1->y;
if (covariance.x < 0) {
swap(x0, x1);
}
if (covariance.y < 0) {
swap(y0, y1);
}
c0->set(x0, y0, c0->z);
c1->set(x1, y1, c1->z);
}
inline static void inset_bbox(Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 inset = (*c0 - *c1) / 16.0f - (8.0f / 255.0f) / 16.0f;
*c0 = clamp(*c0 - inset, 0.0f, 255.0f);
*c1 = clamp(*c1 + inset, 0.0f, 255.0f);
}
float nv::compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output)
{
Vector3 colors[16];
float weights[16];
int count = reduce_colors(input_colors, input_weights, colors, weights);
if (count == 0) {
// Output trivial block.
output->col0.u = 0;
output->col1.u = 0;
output->indices = 0;
return 0;
}
float error = FLT_MAX;
error = compress_dxt1_single_color(colors, weights, count, color_weights, output);
if (error == 0.0f || count == 1) {
// Early out.
return error;
}
// Quick end point selection.
Vector3 c0, c1;
fit_colors_bbox(colors, count, &c0, &c1);
select_diagonal(colors, count, &c0, &c1);
inset_bbox(&c0, &c1);
output_block4(input_colors, color_weights, c0, c1, output);
// Refine color for the selected indices.
if (optimize_end_points4(output->indices, input_colors, 16, &c0, &c1)) {
output_block4(input_colors, color_weights, c0, c1, output);
}
return evaluate_mse(input_colors, input_weights, color_weights, output);
}

@ -13,11 +13,14 @@ namespace nv {
float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output); float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output);
float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); //float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output); float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output);
void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Cluster fit end point selection.
float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Quick end point selection followed by least squares refinement.
float compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output);
} }

@ -3,6 +3,7 @@
#include "OptimalCompressDXT.h" #include "OptimalCompressDXT.h"
#include "QuickCompressDXT.h" #include "QuickCompressDXT.h"
#include "CompressorETC.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h" #include "nvimage/BlockDXT.h"
@ -17,38 +18,45 @@
using namespace nv; using namespace nv;
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
static void convert_to_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, Vector4 rgbm_colors[16], float rgb_weights[16]) {
float weight_sum = 0; float weight_sum = 0;
for (uint i = 0; i < 16; i++) { for (uint i = 0; i < 16; i++) {
const Vector4 & c = input_colors[i]; const Vector4 & c = input_colors[i];
float R = saturate(c.x); float R = saturate(c.x);
float G = saturate(c.y); float G = saturate(c.y);
float B = saturate(c.z); float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m)); float M = max(max(R, G), max(B, min_m));
float r = R / M; float r = R / M;
float g = G / M; float g = G / M;
float b = B / M; float b = B / M;
float a = (M - min_m) / (1 - min_m); float a = (M - min_m) / (1 - min_m);
input_colors_rgbm[i] = Vector4(r, g, b, a); rgbm_colors[i] = Vector4(r, g, b, a);
rgb_weights[i] = input_weights[i] * M; rgb_weights[i] = input_weights[i] * M;
weight_sum += input_weights[i]; weight_sum += input_weights[i];
} }
if (weight_sum == 0) { if (weight_sum == 0) {
for (uint i = 0; i < 16; i++) rgb_weights[i] = 1; for (uint i = 0; i < 16; i++) rgb_weights[i] = 1;
} }
}
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights);
// Compress RGB. // Compress RGB.
compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color); compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color);
@ -138,291 +146,61 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
} }
float nv::compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output) {
#if 0 // Convert to RGBM.
Vector4 rgbm_colors[16];
BlockDXT5 * block = new(output)BlockDXT5; float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, rgbm_colors, rgb_weights);
// Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.
void * etc_output = (uint8 *)output + 8;
// Compress the resulting M values optimally. void * eac_output = output;
// Repeat this several times until compression error does not improve? // Compress RGB.
compress_etc2(rgbm_colors, rgb_weights, Vector3(1), etc_output);
//Vector3 rgb_block[16];
//float m_block[16];
// Init RGB/M block.
#if 0
nvsquish::WeightedClusterFit fit;
ColorBlock rgba;
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgba.color(i) = toColor32(Vector4(r, g, b, a));
}
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
#endif
#if 1
ColorSet rgb;
rgb.allocate(4, 4);
for (uint i = 0; i < 16; i++) {
const Vector4 & c = colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setColorWeights(compressionOptions.colorWeight);
fit.setColorSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
// Decompress RGB/M block. // Decompress RGB/M block.
nv::ColorBlock RGB; decompress_etc(etc_output, rgbm_colors);
block->color.decodeBlock(&RGB);
#if 1 // Compute M values to compensate for RGB's error.
AlphaBlock4x4 M;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
const Vector4 & c = colors[i]; const Vector4 & c = input_colors[i];
float R = saturate(c.x); float R = saturate(c.x);
float G = saturate(c.y); float G = saturate(c.y);
float B = saturate(c.z); float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f; float rm = rgbm_colors[i].x;
float g = RGB.color(i).g / 255.0f; float gm = rgbm_colors[i].y;
float b = RGB.color(i).b / 255.0f; float bm = rgbm_colors[i].z;
float m = (R / r + G / g + B / b) / 3.0f; // compute m such that m * (r/M, g/M, b/M) == RGB
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m)); // Three equations, one unknown:
//float m = max(max(R, G), max(B, min_m)); // m * r/M == R
// m * g/M == G
// m * b/M == B
// Solve in the least squares sense!
// m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T
// m == dot(rgb, RGB) / dot(rgb, rgb)
float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm));
if (!isFinite(m)) {
m = 1;
}
m = (m - min_m) / (1 - min_m); m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); // Store M in alpha channel.
M.weights[i] = weights[i]; rgbm_colors[i].w = saturate(m); // @@ What it we don't saturate?
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#else
OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);
#endif
#if 0
// Decompress M.
block->alpha.decodeBlock(&M);
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
for (uint i = 0; i < src.colorCount; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
//float m = max(max(R, G), max(B, min_m));
float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;
float r = R / m;
float g = G / m;
float b = B / m;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
} }
#endif
#if 0
block->color.decodeBlock(&RGB);
//AlphaBlock4x4 M;
//M.initWeights(src);
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m));
//float m = max(max(R, G), max(B, min_m));
m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = src.weights[i];
}
// Compress M. // Compress M.
if (compressionOptions.quality == Quality_Fastest) { compress_eac(rgbm_colors, input_weights, /*input_channel=*/3, /*search_radius=*/1, /*11bit_mode*/false, eac_output);
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#endif
#if 0
src.fromRGBM(M, min_m);
src.createMinimalSet(/*ignoreTransparent=*/true); return 0; // @@ Compute error.
}
if (src.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(src.color(0), &block->color);
}
else {
// @@ Use our improved compressor.
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&src);
Vector3 start, end;
fit.compress4(&start, &end);
if (fit.compress3(&start, &end)) {
QuickCompress::outputBlock3(src, start, end, block->color);
}
else {
QuickCompress::outputBlock4(src, start, end, block->color);
}
}
#endif // 0
// @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?
// RGBM encoding.
// Maximize precision.
// - Number of possible grey levels:
// - Naive: 2^3 = 8
// - Better: 2^3 + 2^2 = 12
// - How to choose min_m?
// - Ideal = Adaptive per block, don't know where to store.
// - Adaptive per lightmap. How to compute optimal?
// - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.
// - Optimal compressor: Interpolation artifacts.
// - Color transform.
// - Measure error in post-tone-mapping color space.
// - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.
// - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.
// - Enhanced DXT compressor.
// - Typical RGBM encoding as follows:
// rgb -> M = max(rgb), RGB=rgb/M -> RGBM
// - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.
// - We could ensure that M' >= M during compression.
// - We could clamp RGB anyway.
// - We could add a fixed scale value to take into account compression errors and avoid clamping.
// Compress color.
/*if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}*/
#endif // 0

@ -5,5 +5,5 @@ namespace nv {
class Vector4; class Vector4;
float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output); float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output);
float compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output);
} }

File diff suppressed because it is too large Load Diff

@ -0,0 +1,20 @@
#include "nvcore/nvcore.h"
namespace nv {
class Vector3;
class Vector4;
void decompress_etc(const void * input_block, Vector4 output_colors[16]);
void decompress_eac(const void * input_block, Vector4 output_colors[16], int output_channel);
void decompress_etc_eac(const void * input_block, Vector4 output_colors[16]);
float compress_etc1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_etc2(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_etc2_a1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_eac(Vector4 input_colors[16], float input_weights[16], int input_channel, int search_radius, bool use_11bit_mode, void * output);
float compress_etc2_eac(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
}

@ -250,6 +250,8 @@ namespace
// Compute shared exponent. // Compute shared exponent.
int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B; int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B;
nvDebugCheck(exp_shared_p <= Emax);
nvDebugCheck(exp_shared_p >= 0);
int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N))); int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N)));
@ -279,7 +281,7 @@ namespace
{ {
float v = max3(r, g, b); float v = max3(r, g, b);
uint rgbe; uint rgbe = 0;
if (v < 1e-32) { if (v < 1e-32) {
rgbe = 0; rgbe = 0;
@ -534,6 +536,7 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
} }
else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) { else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) {
// @@ // @@
ir = ig = ib = ia = 0;
} }
else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) { else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) {
ir = iround(clamp(r, 0.0f, 65535.0f)); ir = iround(clamp(r, 0.0f, 65535.0f));
@ -543,6 +546,11 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
} }
else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) { else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) {
// @@ // @@
ir = ig = ib = ia = 0;
}
else {
// @@
ir = ig = ib = ia = 0;
} }
uint p = 0; uint p = 0;

@ -39,6 +39,7 @@
#include "cuda/CudaCompressorDXT.h" #include "cuda/CudaCompressorDXT.h"
#include "nvimage/DirectDrawSurface.h" #include "nvimage/DirectDrawSurface.h"
#include "nvimage/KtxFile.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h" #include "nvimage/BlockDXT.h"
#include "nvimage/Image.h" #include "nvimage/Image.h"
@ -51,6 +52,7 @@
#include "nvcore/Memory.h" #include "nvcore/Memory.h"
#include "nvcore/Ptr.h" #include "nvcore/Ptr.h"
#include "nvcore/Array.inl"
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
@ -222,11 +224,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
return false; return false;
} }
nvtt::Surface img;
img.setWrapMode(inputOptions.wrapMode);
img.setAlphaMode(inputOptions.alphaMode);
img.setNormalMap(inputOptions.isNormalMap);
const int faceCount = inputOptions.faceCount; const int faceCount = inputOptions.faceCount;
int width = inputOptions.width; int width = inputOptions.width;
int height = inputOptions.height; int height = inputOptions.height;
@ -244,97 +241,230 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel); if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel);
} }
if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, img.isNormalMap(), compressionOptions, outputOptions)) { if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, inputOptions.isNormalMap, compressionOptions, outputOptions)) {
return false; return false;
} }
// Output images. if (outputOptions.container != Container_KTX)
for (int f = 0; f < faceCount; f++)
{ {
nvtt::Surface img;
img.setWrapMode(inputOptions.wrapMode);
img.setAlphaMode(inputOptions.alphaMode);
img.setNormalMap(inputOptions.isNormalMap);
// Output each face from the largest mipmap to the smallest.
for (int f = 0; f < faceCount; f++)
{
int w = width;
int h = height;
int d = depth;
bool canUseSourceImagesForThisFace = canUseSourceImages;
img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
// To normal map.
if (inputOptions.convertToNormalMap) {
img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
}
// To linear space.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
// Resize input.
img.resize(w, h, d, ResizeFilter_Box);
nvtt::Surface tmp = img;
if (!img.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
for (int m = 1; m < mipmapCount; m++) {
w = max(1, w/2);
h = max(1, h/2);
d = max(1, d/2);
int idx = m * faceCount + f;
bool useSourceImages = false;
if (canUseSourceImagesForThisFace) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
}
}
if (useSourceImages) {
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
// For already generated mipmaps, we need to convert to linear.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
}
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch };
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
}
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) {
if (inputOptions.normalizeMipmaps) {
img.expandNormals();
img.normalizeNormalMap();
img.packNormals();
}
tmp = img;
}
else {
tmp = img;
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, m, compressionOptions, outputOptions);
}
}
}
else
{
// KTX files expect face mipmaps to be interleaved.
Array<nvtt::Surface> images(faceCount);
Array<bool> mipChainBroken(faceCount);
int w = width; int w = width;
int h = height; int h = height;
int d = depth; int d = depth;
bool canUseSourceImagesForThisFace = canUseSourceImages;
img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]); // https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16
uint imageSize = estimateSize(w, h, 1, 1, compressionOptions) * faceCount;
outputOptions.writeData(&imageSize, sizeof(uint32));
// To normal map. for (int f = 0; f < faceCount; f++)
if (inputOptions.convertToNormalMap) { {
img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w); nvtt::Surface s;
img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w); s.setWrapMode(inputOptions.wrapMode);
img.packNormals(); s.setAlphaMode(inputOptions.alphaMode);
} s.setNormalMap(inputOptions.isNormalMap);
// To linear space. s.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
// Resize input. // To normal map.
img.resize(w, h, d, ResizeFilter_Box); if (inputOptions.convertToNormalMap) {
s.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
s.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
}
nvtt::Surface tmp = img; // To linear space.
if (!img.isNormalMap()) { if (!s.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma); s.toLinear(inputOptions.inputGamma);
} }
quantize(tmp, compressionOptions); // Resize input.
compress(tmp, f, 0, compressionOptions, outputOptions); s.resize(w, h, d, ResizeFilter_Box);
nvtt::Surface tmp = s;
if (!s.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
for (int m = 1; m < mipmapCount; m++) { images.push_back(s);
mipChainBroken.push_back(false);
}
static const unsigned char padding[3] = {0, 0, 0};
for (int m = 1; m < mipmapCount; m++)
{
w = max(1, w/2); w = max(1, w/2);
h = max(1, h/2); h = max(1, h/2);
d = max(1, d/2); d = max(1, d/2);
int idx = m * faceCount + f; // https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16
imageSize = estimateSize(w, h, d, 1, compressionOptions) * faceCount;
bool useSourceImages = false; outputOptions.writeData(&imageSize, sizeof(uint32));
if (canUseSourceImagesForThisFace) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level. nvtt::Surface tmp;
canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images.
} for (int f = 0; f < faceCount; f++)
else { {
useSourceImages = true; nvtt::Surface& img = images[f];
int idx = m * faceCount + f;
bool useSourceImages = false;
if (!mipChainBroken[f]) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
mipChainBroken[f] = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
}
} }
}
if (useSourceImages) { if (useSourceImages) {
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]); img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
// For already generated mipmaps, we need to convert to linear. // For already generated mipmaps, we need to convert to linear.
if (!img.isNormalMap()) { if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma); img.toLinear(inputOptions.inputGamma);
}
} }
} else {
else { if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) { float params[2] = { inputOptions.kaiserStretch, inputOptions.kaiserAlpha };
float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch }; img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params); }
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) {
if (inputOptions.normalizeMipmaps) {
img.normalizeNormalMap();
}
tmp = img;
} }
else { else {
img.buildNextMipmap(inputOptions.mipmapFilter); tmp = img;
tmp.toGamma(inputOptions.outputGamma);
} }
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) { quantize(tmp, compressionOptions);
if (inputOptions.normalizeMipmaps) { compress(tmp, f, m, compressionOptions, outputOptions);
img.expandNormals();
img.normalizeNormalMap(); //cube padding
img.packNormals(); if (faceCount == 6 && arraySize == 1)
{
//TODO calc offset for uncompressed images
} }
tmp = img;
}
else {
tmp = img;
tmp.toGamma(inputOptions.outputGamma);
} }
quantize(tmp, compressionOptions); int mipPadding = 3 - ((imageSize + 3) % 4);
compress(tmp, f, m, compressionOptions, outputOptions); if (mipPadding != 0) {
outputOptions.writeData(&padding, mipPadding);
}
} }
} }
@ -673,6 +803,131 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
return writeSucceed; return writeSucceed;
} }
else if (outputOptions.container == Container_KTX)
{
KtxHeader header;
// TODO cube arrays
if (textureType == TextureType_2D) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 1;
header.pixelDepth = 0;
}
else if (textureType == TextureType_Cube) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 6;
header.pixelDepth = 0;
}
else if (textureType == TextureType_3D) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 1;
header.pixelDepth = d;
}
else if (textureType == TextureType_Array) {
header.numberOfArrayElements = arraySize;
header.numberOfFaces = 1;
header.pixelDepth = 0; // Is it?
}
header.pixelWidth = w;
header.pixelHeight = h;
header.numberOfMipmapLevels = mipmapCount;
bool supported = true;
// TODO non-compressed formats
if (compressionOptions.format == Format_RGBA)
{
//header.glType = ?;
//header.glTypeSize = ?;
//header.glFormat = ?;
}
else
{
header.glType = 0;
header.glTypeSize = 1;
header.glFormat = 0;
if (compressionOptions.format == Format_DXT1) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_DXT1a) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_DXT3) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_BC4) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_RGTC1; // KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 ?
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED;
}
else if (compressionOptions.format == Format_BC5) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_RGTC2; // KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 ?
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG;
}
else if (compressionOptions.format == Format_BC6) {
if (compressionOptions.pixelType == PixelType_Float) header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
else /*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; // By default we assume unsigned.
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_BC7) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM : KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_ETC1) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC1 : KTX_INTERNAL_COMPRESSED_RGB_ETC1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_ETC2_R) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED;
}
else if (compressionOptions.format == Format_ETC2_RG) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG;
}
else if (compressionOptions.format == Format_ETC2_RGB) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC2 : KTX_INTERNAL_COMPRESSED_RGB_ETC2;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_ETC2_RGBA) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC : KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else {
supported = false;
}
//TODO compressionOptions.format == Format_DXT1n, Format_DXT5n ? There seems to be no way to indicate a normal map using ktx. Maybe via key value data?
}
if (!supported)
{
// This container does not support the requested format.
outputOptions.error(Error_UnsupportedOutputFormat);
return false;
}
const uint headerSize = 64;
nvStaticCheck(sizeof(KtxHeader) == 64);
bool writeSucceed = outputOptions.writeData(&header, headerSize);
if (!writeSucceed)
{
outputOptions.error(Error_FileWrite);
}
return writeSucceed;
}
return true; return true;
} }
@ -788,15 +1043,34 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression
{ {
return new CompressorBC7; return new CompressorBC7;
} }
/*else if (compressionOptions.format == Format_BC5_Luma)
{
return new ProductionCompressorBC5_Luma;
}*/
else if (compressionOptions.format == Format_BC3_RGBM) else if (compressionOptions.format == Format_BC3_RGBM)
{ {
return new CompressorBC3_RGBM; return new CompressorBC3_RGBM;
} }
else if (compressionOptions.format >= Format_ETC1 && compressionOptions.format <= Format_ETC2_RGB_A1)
{
#if defined(HAVE_RGETC)
if (compressionOptions.format == Format_ETC1 && compressionOptions.externalCompressor == "rg_etc") return new RgEtcCompressor;
#endif
#if defined(HAVE_ETCLIB)
if (compressionOptions.externalCompressor == "etclib") return new EtcLibCompressor;
#endif
if (compressionOptions.format == Format_ETC1) return new CompressorETC1;
else if (compressionOptions.format == Format_ETC2_R) return new CompressorETC2_R;
//else if (compressionOptions.format == Format_ETC2_RG) return new CompressorETC2_RG;
else if (compressionOptions.format == Format_ETC2_RGB) return new CompressorETC2_RGB;
else if (compressionOptions.format == Format_ETC2_RGBA) return new CompressorETC2_RGBA;
}
else if (compressionOptions.format == Format_ETC2_RGBM)
{
return new CompressorETC2_RGBM;
}
else if (compressionOptions.format >= Format_PVR_2BPP_RGB && compressionOptions.format <= Format_PVR_4BPP_RGBA)
{
#if defined(HAVE_PVRTEXTOOL)
return new CompressorPVR;
#endif
}
return NULL; return NULL;
} }
@ -860,3 +1134,24 @@ CompressorInterface * Compressor::Private::chooseGpuCompressor(const Compression
return NULL; return NULL;
} }
int Compressor::Private::estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const
{
const Format format = compressionOptions.format;
const uint bitCount = compressionOptions.bitcount;
const uint pitchAlignment = compressionOptions.pitchAlignment;
int size = 0;
for (int m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, pitchAlignment, format);
// Compute extents of next mipmap:
w = max(1, w / 2);
h = max(1, h / 2);
d = max(1, d / 2);
}
return size;
}

@ -56,6 +56,7 @@ namespace nvtt
nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const; nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const;
nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const; nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const;
int estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const;
bool cudaSupported; bool cudaSupported;
bool cudaEnabled; bool cudaEnabled;

@ -34,61 +34,61 @@
namespace nvtt namespace nvtt
{ {
struct DefaultOutputHandler : public nvtt::OutputHandler struct DefaultOutputHandler : public nvtt::OutputHandler
{ {
DefaultOutputHandler(const char * fileName) : stream(fileName) {} DefaultOutputHandler(const char * fileName) : stream(fileName) {}
DefaultOutputHandler(FILE * fp) : stream(fp, false) {} DefaultOutputHandler(FILE * fp) : stream(fp, false) {}
virtual ~DefaultOutputHandler() {} virtual ~DefaultOutputHandler() {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{ {
// ignore. // ignore.
} }
// Output data. // Output data.
virtual bool writeData(const void * data, int size) virtual bool writeData(const void * data, int size)
{ {
stream.serialize(const_cast<void *>(data), size); stream.serialize(const_cast<void *>(data), size);
//return !stream.isError(); //return !stream.isError();
return true; return true;
} }
virtual void endImage() virtual void endImage()
{ {
// ignore. // ignore.
} }
nv::StdOutputStream stream; nv::StdOutputStream stream;
}; };
struct OutputOptions::Private struct OutputOptions::Private
{ {
nv::Path fileName; nv::Path fileName;
FILE * fileHandle; FILE * fileHandle;
OutputHandler * outputHandler;
ErrorHandler * errorHandler;
bool outputHeader; OutputHandler * outputHandler;
Container container; ErrorHandler * errorHandler;
bool outputHeader;
Container container;
int version; int version;
bool srgb; bool srgb;
bool deleteOutputHandler; bool deleteOutputHandler;
void * wrapperProxy; // For the C/C# wrapper. void * wrapperProxy; // For the C/C# wrapper.
bool hasValidOutputHandler() const;
void beginImage(int size, int width, int height, int depth, int face, int miplevel) const; bool hasValidOutputHandler() const;
bool writeData(const void * data, int size) const;
void beginImage(int size, int width, int height, int depth, int face, int miplevel) const;
bool writeData(const void * data, int size) const;
void endImage() const; void endImage() const;
void error(Error e) const; void error(Error e) const;
}; };
} // nvtt namespace } // nvtt namespace

@ -39,21 +39,21 @@ namespace nv
struct AlphaBlockDXT5; struct AlphaBlockDXT5;
class Vector3; class Vector3;
namespace QuickCompress namespace QuickCompress
{ {
void compressDXT1(const ColorBlock & src, BlockDXT1 * dst); void compressDXT1(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst); void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT3(const ColorBlock & src, BlockDXT3 * dst); void compressDXT3(const ColorBlock & src, BlockDXT3 * dst);
void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8); void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8); void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8); void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8);
void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
} }
} // nv namespace } // nv namespace
#endif // NV_TT_QUICKCOMPRESSDXT_H #endif // NV_TT_QUICKCOMPRESSDXT_H

@ -23,12 +23,14 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include "Surface.h" #include "Surface.h"
#include "CompressorETC.h" // for ETC decoder.
#include "nvmath/Vector.inl" #include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl" #include "nvmath/Matrix.inl"
#include "nvmath/Color.h" #include "nvmath/Color.h"
#include "nvmath/Half.h" #include "nvmath/Half.h"
#include "nvmath/ftoi.h" #include "nvmath/ftoi.h"
#include "nvmath/PackedFloat.h"
#include "nvimage/Filter.h" #include "nvimage/Filter.h"
#include "nvimage/ImageIO.h" #include "nvimage/ImageIO.h"
@ -39,8 +41,13 @@
#include "nvimage/ErrorMetric.h" #include "nvimage/ErrorMetric.h"
#include "nvimage/DirectDrawSurface.h" #include "nvimage/DirectDrawSurface.h"
#include "nvthread/ParallelFor.h"
#include "nvcore/Array.inl"
#include <float.h> #include <float.h>
#include <string.h> // memset, memcpy #include <string.h> // memset, memcpy
//#include <stdio.h> // printf?
#if NV_CC_GNUC #if NV_CC_GNUC
#include <math.h> // exp2f and log2f #include <math.h> // exp2f and log2f
@ -123,6 +130,18 @@ namespace
else if (format == Format_BC7) { else if (format == Format_BC7) {
return 16; return 16;
} }
else if (format == Format_ETC1 || format == Format_ETC2_R || format == Format_ETC2_RGB) {
return 8;
}
else if (format == Format_ETC2_RG || format == Format_ETC2_RGBA || format == Format_ETC2_RGBM) {
return 16;
}
else if (format == Format_PVR_2BPP_RGB || format == Format_PVR_2BPP_RGBA) {
return 4;
}
else if (format == Format_PVR_4BPP_RGB || format == Format_PVR_4BPP_RGBA) {
return 8;
}
return 0; return 0;
} }
@ -197,7 +216,7 @@ uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlign
} }
} }
void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType) { void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType, nvtt::ShapeRestriction shapeRestriction /*= nvtt::ShapeRestriction_None*/) {
nvDebugCheck(width != NULL && *width > 0); nvDebugCheck(width != NULL && *width > 0);
nvDebugCheck(height != NULL && *height > 0); nvDebugCheck(height != NULL && *height > 0);
nvDebugCheck(depth != NULL && *depth > 0); nvDebugCheck(depth != NULL && *depth > 0);
@ -234,21 +253,21 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
// Round to power of two. // Round to power of two.
if (roundMode == RoundMode_ToNextPowerOfTwo) if (roundMode == RoundMode_ToNextPowerOfTwo)
{ {
w = nextPowerOfTwo(w); w = nextPowerOfTwo(U32(w));
h = nextPowerOfTwo(h); h = nextPowerOfTwo(U32(h));
d = nextPowerOfTwo(d); d = nextPowerOfTwo(U32(d));
} }
else if (roundMode == RoundMode_ToNearestPowerOfTwo) else if (roundMode == RoundMode_ToNearestPowerOfTwo)
{ {
w = nearestPowerOfTwo(w); w = nearestPowerOfTwo(U32(w));
h = nearestPowerOfTwo(h); h = nearestPowerOfTwo(U32(h));
d = nearestPowerOfTwo(d); d = nearestPowerOfTwo(U32(d));
} }
else if (roundMode == RoundMode_ToPreviousPowerOfTwo) else if (roundMode == RoundMode_ToPreviousPowerOfTwo)
{ {
w = previousPowerOfTwo(w); w = previousPowerOfTwo(U32(w));
h = previousPowerOfTwo(h); h = previousPowerOfTwo(U32(h));
d = previousPowerOfTwo(d); d = previousPowerOfTwo(U32(d));
} }
else if (roundMode == RoundMode_ToNextMultipleOfFour) else if (roundMode == RoundMode_ToNextMultipleOfFour)
{ {
@ -269,6 +288,38 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
d = previousMultipleOfFour(d); d = previousMultipleOfFour(d);
} }
if(shapeRestriction == ShapeRestriction_Square)
{
if (textureType == TextureType_2D)
{
int md = nv::min(w,h);
w = md;
h = md;
d = 1;
}
else if (textureType == TextureType_3D)
{
int md = nv::min(nv::min(w,h),d);
w = md;
h = md;
d = md;
}
else if (textureType == TextureType_Cube)
{
int md = nv::min(w, h);
w = md;
h = md;
d = 1;
}
}
else
{
if (textureType == TextureType_2D || textureType == TextureType_Cube)
{
d = 1;
}
}
*width = w; *width = w;
*height = h; *height = h;
*depth = d; *depth = d;
@ -509,8 +560,8 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_c
} }
} }
*rangeMin = range.x; if (rangeMin) *rangeMin = range.x;
*rangeMax = range.y; if (rangeMax) *rangeMax = range.y;
} }
bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/) bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
@ -583,7 +634,7 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
} }
// @@ Have loadFloat allocate the image with the desired number of channels. // @@ Have loadFloat allocate the image with the desired number of channels.
img->resizeChannelCount(4); //img->resizeChannelCount(4);
delete m->image; delete m->image;
m->image = img.release(); m->image = img.release();
@ -601,7 +652,8 @@ bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) c
return ImageIO::saveFloat(fileName, m->image, 0, 4); return ImageIO::saveFloat(fileName, m->image, 0, 4);
} }
else { else {
AutoPtr<Image> image(m->image->createImage(0, 4)); uint c = min<uint>(m->image->componentCount(), 4);
AutoPtr<Image> image(m->image->createImage(0, c));
nvCheck(image != NULL); nvCheck(image != NULL);
if (hasAlpha) { if (hasAlpha) {
@ -829,16 +881,35 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
return true; return true;
} }
#if defined(HAVE_PVRTEXTOOL)
#include <PVRTDecompress.h>
#endif
// @@ Add support for compressed 3D textures. // @@ Add support for compressed 3D textures.
bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data) bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data)
{ {
if (format != nvtt::Format_BC1 && if (format != nvtt::Format_BC1 &&
format != nvtt::Format_BC2 && format != nvtt::Format_BC2 &&
format != nvtt::Format_BC3 && format != nvtt::Format_BC3 &&
format != nvtt::Format_BC3n &&
format != nvtt::Format_BC3_RGBM &&
format != nvtt::Format_BC4 && format != nvtt::Format_BC4 &&
format != nvtt::Format_BC5 && format != nvtt::Format_BC5 &&
format != nvtt::Format_BC6 && format != nvtt::Format_BC6 &&
format != nvtt::Format_BC7) format != nvtt::Format_BC7 &&
format != nvtt::Format_ETC1 &&
format != nvtt::Format_ETC2_R &&
format != nvtt::Format_ETC2_RG &&
format != nvtt::Format_ETC2_RGB &&
format != nvtt::Format_ETC2_RGBA &&
format != nvtt::Format_ETC2_RGBM
#if defined(HAVE_PVRTEXTOOL)
&& format != nvtt::Format_PVR_2BPP_RGB
&& format != nvtt::Format_PVR_4BPP_RGB
&& format != nvtt::Format_PVR_2BPP_RGBA
&& format != nvtt::Format_PVR_4BPP_RGBA
#endif
)
{ {
return false; return false;
} }
@ -851,7 +922,7 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
m->image->allocate(4, w, h, 1); m->image->allocate(4, w, h, 1);
m->type = TextureType_2D; m->type = TextureType_2D;
const int bw = (w + 3) / 4; const int bw = (w + 3) / 4; // @@ Not if PVR 2bpp!
const int bh = (h + 3) / 4; const int bh = (h + 3) / 4;
const uint bs = blockSize(format); const uint bs = blockSize(format);
@ -859,130 +930,166 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
const uint8 * ptr = (const uint8 *)data; const uint8 * ptr = (const uint8 *)data;
TRY { TRY {
if (format == nvtt::Format_BC6) #if defined(HAVE_PVRTEXTOOL)
{ if (format >= nvtt::Format_PVR_2BPP_RGB && format <= nvtt::Format_PVR_4BPP_RGBA)
// BC6 format - decode directly to float {
bool two_bit_mode = (format == nvtt::Format_PVR_2BPP_RGB || format == nvtt::Format_PVR_2BPP_RGBA);
for (int y = 0; y < bh; y++)
{ uint8 * output = new uint8[4 * w * h];
for (int x = 0; x < bw; x++)
{ PVRTDecompressPVRTC(ptr, two_bit_mode, w, h, output);
Vector3 colors[16];
const BlockBC6 * block = (const BlockBC6 *)ptr; for (int y = 0; y < h; y++) {
block->decodeBlock(colors); for (int x = 0; x < w; x++) {
m->image->pixel(0, x, y, 0) = output[4*(y*w + x) + 0] / 255.0f;
for (int yy = 0; yy < 4; yy++) m->image->pixel(1, x, y, 0) = output[4*(y*w + x) + 1] / 255.0f;
{ m->image->pixel(2, x, y, 0) = output[4*(y*w + x) + 2] / 255.0f;
for (int xx = 0; xx < 4; xx++) m->image->pixel(3, x, y, 0) = output[4*(y*w + x) + 3] / 255.0f;
{ }
Vector3 rgb = colors[yy*4 + xx]; }
if (x * 4 + xx < w && y * 4 + yy < h) delete [] output;
{ }
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = rgb.x; else
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = rgb.y; #endif
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = rgb.z; if (format == nvtt::Format_BC6 || (format >= nvtt::Format_ETC1 && format <= nvtt::Format_ETC2_RGBM))
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = 1.0f; {
} // Some formats we decode directly to float:
}
} for (int y = 0; y < bh; y++) {
for (int x = 0; x < bw; x++) {
ptr += bs; Vector4 colors[16];
}
} if (format == nvtt::Format_BC6) {
} const BlockBC6 * block = (const BlockBC6 *)ptr;
else block->decodeBlock(colors);
{ }
// Non-BC6 - decode to 8-bit, then convert to float else if (format == nvtt::Format_ETC1 || format == nvtt::Format_ETC2_RGB) {
nv::decompress_etc(ptr, colors);
for (int y = 0; y < bh; y++) }
{ else if (format == nvtt::Format_ETC2_RGBA || format == nvtt::Format_ETC2_RGBM) {
for (int x = 0; x < bw; x++) nv::decompress_etc_eac(ptr, colors);
{ }
ColorBlock colors; else if (format == nvtt::Format_ETC2_R) {
// @@ Not implemented.
if (format == nvtt::Format_BC1) //nv::decompress_eac(ptr, colors);
{ }
const BlockDXT1 * block = (const BlockDXT1 *)ptr; else if (format == nvtt::Format_ETC2_RG) {
// @@ Not implemented.
if (decoder == Decoder_D3D10) { //nv::decompress_eac(ptr, colors);
block->decodeBlock(&colors, false); }
} else if (format == nvtt::Format_ETC2_RGB_A1) {
else if (decoder == Decoder_D3D9) { // @@ Not implemented?
block->decodeBlock(&colors, false); //nv::decompress_etc(ptr, colors);
} }
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors); for (int yy = 0; yy < 4; yy++) {
} for (int xx = 0; xx < 4; xx++) {
} Vector4 c = colors[yy*4 + xx];
else if (format == nvtt::Format_BC2)
{ if (x * 4 + xx < w && y * 4 + yy < h) {
const BlockDXT3 * block = (const BlockDXT3 *)ptr; m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = c.x;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = c.y;
if (decoder == Decoder_D3D10) { m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = c.z;
block->decodeBlock(&colors, false); m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = c.w;
} }
else if (decoder == Decoder_D3D9) { }
block->decodeBlock(&colors, false); }
}
else if (decoder == Decoder_NV5x) { ptr += bs;
block->decodeBlockNV5x(&colors); }
} }
} }
else if (format == nvtt::Format_BC3) else
{ {
const BlockDXT5 * block = (const BlockDXT5 *)ptr; // Others, we decode to 8-bit, then convert to float
if (decoder == Decoder_D3D10) { for (int y = 0; y < bh; y++) {
block->decodeBlock(&colors, false); for (int x = 0; x < bw; x++) {
} ColorBlock colors;
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false); if (format == nvtt::Format_BC1)
} {
else if (decoder == Decoder_NV5x) { const BlockDXT1 * block = (const BlockDXT1 *)ptr;
block->decodeBlockNV5x(&colors);
} if (decoder == Decoder_D3D10) {
} block->decodeBlock(&colors, false);
else if (format == nvtt::Format_BC4) }
{ else if (decoder == Decoder_D3D9) {
const BlockATI1 * block = (const BlockATI1 *)ptr; block->decodeBlock(&colors, false);
block->decodeBlock(&colors, decoder == Decoder_D3D9); }
} else if (decoder == Decoder_NV5x) {
else if (format == nvtt::Format_BC5) block->decodeBlockNV5x(&colors);
{ }
const BlockATI2 * block = (const BlockATI2 *)ptr; }
block->decodeBlock(&colors, decoder == Decoder_D3D9); else if (format == nvtt::Format_BC2)
} {
else if (format == nvtt::Format_BC7) const BlockDXT3 * block = (const BlockDXT3 *)ptr;
{
const BlockBC7 * block = (const BlockBC7 *)ptr; if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors); block->decodeBlock(&colors, false);
} }
else else if (decoder == Decoder_D3D9) {
{ block->decodeBlock(&colors, false);
nvDebugCheck(false); }
} else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
for (int yy = 0; yy < 4; yy++) }
{ }
for (int xx = 0; xx < 4; xx++) else if (format == nvtt::Format_BC3 || format == nvtt::Format_BC3n || format == nvtt::Format_BC3_RGBM)
{ {
Color32 c = colors.color(xx, yy); const BlockDXT5 * block = (const BlockDXT5 *)ptr;
if (x * 4 + xx < w && y * 4 + yy < h) if (decoder == Decoder_D3D10) {
{ block->decodeBlock(&colors, false);
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f; }
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f; else if (decoder == Decoder_D3D9) {
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f; block->decodeBlock(&colors, false);
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f; }
} else if (decoder == Decoder_NV5x) {
} block->decodeBlockNV5x(&colors);
} }
}
ptr += bs; else if (format == nvtt::Format_BC4)
} {
} const BlockATI1 * block = (const BlockATI1 *)ptr;
} block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC5)
{
const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC7)
{
const BlockBC7 * block = (const BlockBC7 *)ptr;
block->decodeBlock(&colors);
}
else
{
nvDebugCheck(false);
}
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Color32 c = colors.color(xx, yy);
if (x * 4 + xx < w && y * 4 + yy < h)
{
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f;
}
}
}
ptr += bs;
}
}
}
} }
CATCH { CATCH {
return false; return false;
@ -1092,7 +1199,7 @@ void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth
m->image = img; m->image = img;
} }
void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter) void Surface::resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter)
{ {
if (isNull()) return; if (isNull()) return;
@ -1104,27 +1211,17 @@ void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilte
int h = m->image->height(); int h = m->image->height();
int d = m->image->depth(); int d = m->image->depth();
getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type); getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type, nvtt::ShapeRestriction_Square);
if (m->type == TextureType_2D) if (m->type == TextureType_2D)
{ {
nvDebugCheck(d==1); nvDebugCheck(d==1);
int md = nv::min(w,h);
w = md;
h = md;
} }
else if (m->type == TextureType_Cube) else if (m->type == TextureType_Cube)
{ {
nvDebugCheck(d==1); nvDebugCheck(d==1);
nvDebugCheck(w==h); nvDebugCheck(w==h);
} }
else if (m->type == TextureType_3D)
{
int md = nv::min(nv::min(w,h),d);
w = md;
h = md;
d = md;
}
resize(w, h, d, filter, filterWidth, params); resize(w, h, d, filter, filterWidth, params);
} }
@ -1151,6 +1248,63 @@ void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, fl
resize(w, h, d, filter, filterWidth, params); resize(w, h, d, filter, filterWidth, params);
} }
float rmsBilinearError(nvtt::Surface original, nvtt::Surface resized) {
return nv::rmsBilinearColorError(original.m->image, resized.m->image, (FloatImage::WrapMode)original.wrapMode(), original.alphaMode() == AlphaMode_Transparency);
}
void Surface::autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter)
{
Surface original = *this;
Surface resized = original;
int w = width();
int h = height();
int d = depth();
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
while (w >= 4 && h >= 4 && d >= 1) {
// Resize always from original? This is more expensive, but should produce higher quality.
//resized = original;
resized.resize(w, h, d, filter);
#if 0
// Scale back up to original size. @@ Upscaling not implemented!
Surface restored = resized;
restored.resize(original.width(), original.height(), original.depth(), ResizeFilter_Triangle);
float error;
if (isNormalMap()) {
error = nvtt::angularError(original, restored);
}
else {
error = nvtt::rmsError(original, restored);
}
#else
float error = rmsBilinearError(original, resized);
#endif
if (error < errorTolerance) {
*this = resized;
nvDebug("image resized %dx%d -> %dx%d (error=%f)\n", original.width(), original.height(), w, h, error);
}
else {
nvDebug("image can't be resized further (error=%f)\n", error);
break;
}
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
}
bool Surface::canMakeNextMipmap(int min_size /*= 1*/) bool Surface::canMakeNextMipmap(int min_size /*= 1*/)
{ {
if (isNull()) return false; if (isNull()) return false;
@ -1196,7 +1350,7 @@ bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const floa
{ {
nvDebugCheck(filter == MipmapFilter_Kaiser); nvDebugCheck(filter == MipmapFilter_Kaiser);
KaiserFilter filter(filterWidth); KaiserFilter filter(filterWidth);
if (params != NULL) filter.setParameters(params[0], params[1]); if (params != NULL) filter.setParameters(/*alpha=*/params[0], /*stretch=*/params[1]);
img = img->downSample(filter, wrapMode, 3); img = img->downSample(filter, wrapMode, 3);
} }
} }
@ -1357,8 +1511,9 @@ void Surface::toSrgb()
for (uint c = 0; c < 3; c++) { for (uint c = 0; c < 3; c++) {
float * channel = img->channel(c); float * channel = img->channel(c);
for (uint i = 0; i < count; i++) { for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [=](int i) {
channel[i] = ::toSrgb(channel[i]); channel[i] = ::toSrgb(channel[i]);
} }//);
} }
} }
@ -1382,8 +1537,9 @@ void Surface::toLinearFromSrgb()
for (uint c = 0; c < 3; c++) { for (uint c = 0; c < 3; c++) {
float * channel = img->channel(c); float * channel = img->channel(c);
for (uint i = 0; i < count; i++) { for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [=](int i) {
channel[i] = ::fromSrgb(channel[i]); channel[i] = ::fromSrgb(channel[i]);
} }//);
} }
} }
@ -2827,6 +2983,78 @@ Surface Surface::createSubImage(int x0, int x1, int y0, int y1, int z0, int z1)
return s; return s;
} }
Surface Surface::warp(int w, int h, WarpFunction * warp_function) const
{
Surface s;
FloatImage * img = s.m->image = new FloatImage;
const int C = m->image->componentCount();
img->allocate(C, w, h, 1);
#define USE_PARALLEL_FOR 0
#if USE_PARALLEL_FOR
nv::parallel_for(h, 1, [=](int y) {
#else
for (int y = 0; y < h; y++) {
#endif
for (int x = 0; x < w; x++) {
float fx = (float(x) + 0.0f) / w;
float fy = (float(y) + 0.0f) / h;
float fz = 0;
warp_function(fx, fy, fz);
for (int c = 0; c < C; c++) {
img->pixel(c, x, y, 0) = m->image->sampleLinearClamp(c, fx, fy);
}
}
}
#if USE_PARALLEL_FOR
);
#endif
return s;
}
Surface Surface::warp(int w, int h, int d, WarpFunction * warp_function) const
{
Surface s;
FloatImage * img = s.m->image = new FloatImage;
const int C = m->image->componentCount();
img->allocate(C, w, h, d);
for (int z = 0; z < d; z++) {
#define USE_PARALLEL_FOR 0
#if USE_PARALLEL_FOR
nv::parallel_for(h, 1, [=](int y) {
#else
for (int y = 0; y < h; y++) {
#endif
for (int x = 0; x < w; x++) {
float fx = (float(x) + 0.0f) / w;
float fy = (float(y) + 0.0f) / h;
float fz = (float(z) + 0.0f) / d;
warp_function(fx, fy, fz);
for (int c = 0; c < C; c++) {
img->pixel(c, x, y, z) = m->image->sampleLinearClamp(c, fx, fy, fz); // @@ 2D only.
}
}
}
#if USE_PARALLEL_FOR
);
#endif
}
return s;
}
bool Surface::copyChannel(const Surface & srcImage, int srcChannel) bool Surface::copyChannel(const Surface & srcImage, int srcChannel)
{ {
return copyChannel(srcImage, srcChannel, srcChannel); return copyChannel(srcImage, srcChannel, srcChannel);
@ -2953,7 +3181,7 @@ void Surface::setAtlasBorder(int aw, int ah, float r, float g, float b, float a)
} }
// Vertical lines: // Vertical lines:
for (uint i = 0, x = 0; i < uint(ah); i++, x += tile_width) for (uint i = 0, x = 0; i < uint(aw); i++, x += tile_width)
{ {
for (uint y = 0; y < h; y++) for (uint y = 0; y < h; y++)
{ {
@ -3083,9 +3311,9 @@ Surface nvtt::histogram(const Surface & img, int width, int height)
return histogram(img, /*minRange*/0, maxRange, width, height); return histogram(img, /*minRange*/0, maxRange, width, height);
} }
#include "nvcore/Array.inl" //#include "nvcore/Array.inl"
#include "nvmath/PackedFloat.h" //#include "nvmath/PackedFloat.h"
#include <stdio.h> //#include <stdio.h>
nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height) nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height)
{ {
@ -3234,7 +3462,7 @@ nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRang
maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z)); maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z));
} }
printf("maxh = %f\n", maxh); //printf("maxh = %f\n", maxh);
//maxh = 80; //maxh = 80;
maxh = 256; maxh = 256;

@ -83,7 +83,7 @@ namespace nv {
uint countMipmaps(uint w, uint h, uint d); uint countMipmaps(uint w, uint h, uint d);
uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size); uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size);
uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format); uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format);
void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType); void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType, nvtt::ShapeRestriction shapeRestriction = nvtt::ShapeRestriction_None);
} }

@ -10,8 +10,8 @@
// Gran Central Dispatch (GCD/libdispatch) // Gran Central Dispatch (GCD/libdispatch)
// http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html // http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) #if NV_OS_DARWIN && defined(HAVE_DISPATCH_H)
#define HAVE_GCD 1 //#define HAVE_GCD 1
#include <dispatch/dispatch.h> //#include <dispatch/dispatch.h>
#endif #endif
// Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime: // Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime:
@ -64,7 +64,7 @@ namespace nvtt {
#endif #endif
#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) #if HAVE_GCD
// Task dispatcher using Apple's Grand Central Dispatch. // Task dispatcher using Apple's Grand Central Dispatch.
struct AppleTaskDispatcher : public TaskDispatcher struct AppleTaskDispatcher : public TaskDispatcher

@ -47,9 +47,9 @@ const char * nvtt::errorString(Error e)
return "Error writing through output handler"; return "Error writing through output handler";
case Error_UnsupportedOutputFormat: case Error_UnsupportedOutputFormat:
return "The container file does not support the selected output format"; return "The container file does not support the selected output format";
default:
return "Invalid error";
} }
return "Invalid error";
} }
// Return NVTT version. // Return NVTT version.

@ -105,7 +105,21 @@ namespace nvtt
Format_BC6, Format_BC6,
Format_BC7, Format_BC7,
Format_BC3_RGBM, // Format_BC3_RGBM,
Format_ETC1,
Format_ETC2_R,
Format_ETC2_RG,
Format_ETC2_RGB,
Format_ETC2_RGBA,
Format_ETC2_RGB_A1,
Format_ETC2_RGBM,
Format_PVR_2BPP_RGB, // Using PVR textools.
Format_PVR_4BPP_RGB,
Format_PVR_2BPP_RGBA,
Format_PVR_4BPP_RGBA,
Format_Count Format_Count
}; };
@ -155,6 +169,7 @@ namespace nvtt
NVTT_API void setFormat(Format format); NVTT_API void setFormat(Format format);
NVTT_API void setQuality(Quality quality); NVTT_API void setQuality(Quality quality);
NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f); NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f);
NVTT_API void setRGBMThreshold(float min_m);
NVTT_API void setExternalCompressor(const char * name); NVTT_API void setExternalCompressor(const char * name);
@ -173,9 +188,10 @@ namespace nvtt
NVTT_API void setTargetDecoder(Decoder decoder); NVTT_API void setTargetDecoder(Decoder decoder);
// Translate to and from D3D formats. // Translate to and from D3D formats.
NVTT_API Format format() const;
NVTT_API unsigned int d3d9Format() const; NVTT_API unsigned int d3d9Format() const;
NVTT_API unsigned int dxgiFormat() const;
//NVTT_API bool setD3D9Format(unsigned int format); //NVTT_API bool setD3D9Format(unsigned int format);
//NVTT_API unsigned int dxgiFormat() const;
//NVTT_API bool setDxgiFormat(unsigned int format); //NVTT_API bool setDxgiFormat(unsigned int format);
}; };
@ -253,6 +269,14 @@ namespace nvtt
AlphaMode_Premultiplied, AlphaMode_Premultiplied,
}; };
// Extents shape restrictions
enum ShapeRestriction
{
ShapeRestriction_None,
ShapeRestriction_Square,
};
// Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1) // Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1)
struct InputOptions struct InputOptions
{ {
@ -344,7 +368,7 @@ namespace nvtt
{ {
Container_DDS, Container_DDS,
Container_DDS10, Container_DDS10,
// Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/ Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/
// Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format // Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format
}; };
@ -439,6 +463,9 @@ namespace nvtt
ToneMapper_Lightmap, ToneMapper_Lightmap,
}; };
// Transform the given x,y coordinates.
typedef void WarpFunction(float & x, float & y, float & d);
// A surface is one level of a 2D or 3D texture. (New in NVTT 2.1) // A surface is one level of a 2D or 3D texture. (New in NVTT 2.1)
// @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression. // @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression.
@ -486,7 +513,8 @@ namespace nvtt
NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0); NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter); NVTT_API void resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter);
NVTT_API void autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter);
NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1); NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1);
NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1); NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1);
@ -554,6 +582,10 @@ namespace nvtt
NVTT_API void flipZ(); NVTT_API void flipZ();
NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const; NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const;
NVTT_API Surface warp(int w, int h, WarpFunction * f) const;
NVTT_API Surface warp(int w, int h, int d, WarpFunction * f) const;
// Copy image data. // Copy image data.
NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel); NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel);
NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel); NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel);

@ -146,9 +146,16 @@ static const char * s_witnessImageSet[] = {
}; };
static const char * s_witnessLmapImageSet[] = { static const char * s_witnessLmapImageSet[] = {
"specruin.dds", "hallway.dds",
"cottage.dds", "windmill.dds",
"tunnel.dds",
"theater.dds",
"tower.dds", "tower.dds",
"hub.dds",
"mine.dds",
"archway.dds",
"hut.dds",
"shaft.dds",
}; };
static const char * s_normalMapImageSet[] = { static const char * s_normalMapImageSet[] = {
@ -187,8 +194,14 @@ enum Mode {
Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Paraboloid,
Mode_BC5_Normal_Quartic, Mode_BC5_Normal_Quartic,
//Mode_BC5_Normal_DualParaboloid, //Mode_BC5_Normal_DualParaboloid,
Mode_BC6, Mode_BC6,
Mode_BC7, Mode_BC7,
Mode_ETC1_IC,
Mode_ETC1_EtcLib,
Mode_ETC2_EtcLib,
Mode_ETC1_RgEtc,
Mode_ETC2_RGBM,
Mode_PVR,
Mode_Count Mode_Count
}; };
static const char * s_modeNames[] = { static const char * s_modeNames[] = {
@ -207,8 +220,14 @@ static const char * s_modeNames[] = {
"BC5-Normal-Paraboloid", // Mode_BC5_Normal_Paraboloid, "BC5-Normal-Paraboloid", // Mode_BC5_Normal_Paraboloid,
"BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic, "BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic,
//"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid, //"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid,
"BC6", // Mode_BC6, "BC6", // Mode_BC6,
"BC7", // Mode_BC7, "BC7", // Mode_BC7,
"ETC1-IC",
"ETC1-EtcLib",
"ETC2-EtcLib",
"ETC1-RgEtc",
"ETC2-RGBM",
"PVR",
}; };
nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count); nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count);
@ -218,14 +237,16 @@ struct Test {
Mode modes[6]; Mode modes[6];
}; };
static Test s_imageTests[] = { static Test s_imageTests[] = {
{"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}}, /*0*/ {"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}},
{"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}}, /*1*/ {"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}},
//{"Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}}, /*2*/ {"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}},
{"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}}, /*3*/ {"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}},
{"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}}, /*4*/ {"HDR", 3, {Mode_ETC2_RGBM, Mode_BC3_RGBM, Mode_BC6}},
{"HDR", 2, {Mode_BC3_RGBM, Mode_BC6}}, /*5*/ {"BC6", 1, {Mode_BC6}},
{"BC6", 1, {Mode_BC6}}, /*6*/ {"BC7", 1, {Mode_BC7}},
{"BC7", 1, {Mode_BC7}}, /*7*/ {"ETC", 3, {Mode_ETC1_IC, Mode_ETC1_RgEtc, Mode_ETC2_EtcLib}},
/*8*/ {"Color Mobile", 4, {Mode_PVR, Mode_ETC1_IC, Mode_ETC2_EtcLib, Mode_BC1}},
/*9*/ //{"ETC-Lightmap", 2, {Mode_BC3_RGBM, Mode_ETC_RGBM}},
}; };
const int s_imageTestCount = ARRAY_SIZE(s_imageTests); const int s_imageTestCount = ARRAY_SIZE(s_imageTests);
@ -404,10 +425,10 @@ int main(int argc, char *argv[])
i++; i++;
} }
} }
else else
{ {
printf("Warning: unrecognized option \"%s\"\n", argv[i]); printf("Warning: unrecognized option \"%s\"\n", argv[i]);
} }
} }
// Validate inputs. // Validate inputs.
@ -462,7 +483,8 @@ int main(int argc, char *argv[])
} }
else else
{ {
compressionOptions.setQuality(nvtt::Quality_Production); compressionOptions.setQuality(nvtt::Quality_Normal);
//compressionOptions.setQuality(nvtt::Quality_Production);
} }
//compressionOptions.setExternalCompressor("ati"); //compressionOptions.setExternalCompressor("ati");
//compressionOptions.setExternalCompressor("squish"); //compressionOptions.setExternalCompressor("squish");
@ -515,13 +537,13 @@ int main(int argc, char *argv[])
// Labels on the left side. // Labels on the left side.
if (errorMode == ErrorMode_RMSE) { if (errorMode == ErrorMode_RMSE) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01"; graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.03,0.01";
} }
else if (errorMode == ErrorMode_CieLab) { else if (errorMode == ErrorMode_CieLab) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,4,22,1"; graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,4,22,1";
} }
else if (errorMode == ErrorMode_AngularRMSE) { else if (errorMode == ErrorMode_AngularRMSE) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01"; graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.2,0.02"; // 0.05,0.01
} }
// Labels at the bottom. // Labels at the bottom.
@ -581,7 +603,6 @@ int main(int argc, char *argv[])
else if (errorMode == ErrorMode_AngularRMSE) { else if (errorMode == ErrorMode_AngularRMSE) {
graphWriter << "&chtt=" << set.name << "%20-%20" << test.name << "%20-%20Angular RMSE"; graphWriter << "&chtt=" << set.name << "%20-%20" << test.name << "%20-%20Angular RMSE";
} }
Timer timer; Timer timer;
@ -590,7 +611,7 @@ int main(int argc, char *argv[])
nvtt::Surface img; nvtt::Surface img;
printf("Running Test: %s with Set: %s\n", test.name, set.name); printf("Running test '%s' with set '%s'\n", test.name, set.name);
graphWriter << "&chd=t:"; graphWriter << "&chd=t:";
@ -602,10 +623,11 @@ int main(int argc, char *argv[])
Mode mode = test.modes[t]; Mode mode = test.modes[t];
nvtt::Format format; nvtt::Format format;
const char * compressor_name = NULL;
if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal || mode == Mode_BC3_RGBS) { if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal || mode == Mode_BC3_RGBS) {
format = nvtt::Format_BC1; format = nvtt::Format_BC1;
} }
else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM || mode == Mode_BC3_LUVW) { else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_LUVW) {
format = nvtt::Format_BC3; format = nvtt::Format_BC3;
} }
else if (mode == Mode_BC3_Normal) { else if (mode == Mode_BC3_Normal) {
@ -614,20 +636,51 @@ int main(int argc, char *argv[])
else if (mode == Mode_BC5_Normal || mode == Mode_BC5_Normal_Stereographic || mode == Mode_BC5_Normal_Paraboloid || mode == Mode_BC5_Normal_Quartic) { else if (mode == Mode_BC5_Normal || mode == Mode_BC5_Normal_Stereographic || mode == Mode_BC5_Normal_Paraboloid || mode == Mode_BC5_Normal_Quartic) {
format = nvtt::Format_BC5; format = nvtt::Format_BC5;
} }
else if (mode == Mode_BC6) else if (mode == Mode_BC3_RGBM) {
{ format = nvtt::Format_BC3_RGBM;
format = nvtt::Format_BC6; }
} else if (mode == Mode_BC6)
else if (mode == Mode_BC7) {
{ format = nvtt::Format_BC6;
format = nvtt::Format_BC7; }
} else if (mode == Mode_BC7)
else {
{ format = nvtt::Format_BC7;
nvDebugCheck(false); }
} else if (mode == Mode_ETC1_IC)
{
format = nvtt::Format_ETC1;
}
else if (mode == Mode_ETC1_EtcLib)
{
format = nvtt::Format_ETC1;
compressor_name = "etclib";
}
else if (mode == Mode_ETC2_EtcLib)
{
format = nvtt::Format_ETC2_RGB;
compressor_name = "etclib";
}
else if (mode == Mode_ETC1_RgEtc)
{
format = nvtt::Format_ETC1;
compressor_name = "rg_etc";
}
else if (mode == Mode_ETC2_RGBM)
{
format = nvtt::Format_ETC2_RGBM;
}
else if (mode == Mode_PVR)
{
format = nvtt::Format_PVR_4BPP_RGB;
}
else
{
nvUnreachable();
}
compressionOptions.setFormat(format); compressionOptions.setFormat(format);
if (compressor_name) compressionOptions.setExternalCompressor(compressor_name);
if (set.type == ImageType_RGBA) { if (set.type == ImageType_RGBA) {
img.setAlphaMode(nvtt::AlphaMode_Transparency); img.setAlphaMode(nvtt::AlphaMode_Transparency);
@ -653,6 +706,7 @@ int main(int argc, char *argv[])
printf("Input image '%s' not found.\n", set.fileNames[i]); printf("Input image '%s' not found.\n", set.fileNames[i]);
return EXIT_FAILURE; return EXIT_FAILURE;
} }
float color_range = 0.0f;
if (img.isNormalMap()) { if (img.isNormalMap()) {
img.normalizeNormalMap(); img.normalizeNormalMap();
@ -693,16 +747,34 @@ int main(int argc, char *argv[])
tmp.clamp(2); tmp.clamp(2);
tmp.clamp(3); tmp.clamp(3);
} }
else if (mode == Mode_BC3_RGBM) { else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) {
tmp.setAlphaMode(nvtt::AlphaMode_None); float r, g, b;
if (set.type == ImageType_HDR) { tmp.range(0, NULL, &r);
// Transform to gamma-2.0 space before applying RGBM - helps a lot with banding in the darks. tmp.range(1, NULL, &g);
tmp.toGamma(2.0f); tmp.range(2, NULL, &b);
tmp.toRGBM(3.0f); // range of 3.0 in gamma-2.0 space == range of 9.0 in linear space color_range = max3(r, g, b);
printf("color range = %f\n", color_range);
tmp.setAlphaMode(nvtt::AlphaMode_Transparency);
const float max_color_range = 16.0f;
if (color_range > max_color_range) {
color_range = max_color_range;
} }
else {
tmp.toRGBM(); for (int i = 0; i < 3; i++) {
tmp.scaleBias(i, 1.0f / color_range, 0.0f);
} }
tmp.toneMap(nvtt::ToneMapper_Linear, /*parameters=*/NULL); // Clamp without changing the hue.
// Clamp alpha.
tmp.clamp(3);
// To gamma.
tmp.toGamma(2);
compressionOptions.setRGBMThreshold(0.2f);
} }
else if (mode == Mode_BC3_LUVW) { else if (mode == Mode_BC3_LUVW) {
tmp.setAlphaMode(nvtt::AlphaMode_None); tmp.setAlphaMode(nvtt::AlphaMode_None);
@ -781,14 +853,25 @@ int main(int argc, char *argv[])
}*/ }*/
} }
} }
else if (mode == Mode_BC3_RGBM) { else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) {
if (set.type == ImageType_HDR) { /*if (set.type == ImageType_HDR) {
img_out.fromRGBM(3.0f); //img_out.fromRGBM(3.0f);
img_out.toLinear(2.0f); img_out.fromRGBM(range);
img_out.toLinear(2.0f);
} }
else { else {
img_out.fromRGBM(); img_out.fromRGBM();
}*/
img_out.fromRGBM(1.0f, 0.2f);
img_out.toLinear(2);
for (int i = 0; i < 3; i++) {
img_out.scaleBias(i, color_range, 0.0f);
} }
img_out.copyChannel(img, 3); // Copy alpha channel from source.
img_out.setAlphaMode(nvtt::AlphaMode_Transparency);
} }
else if (mode == Mode_BC3_LUVW) { else if (mode == Mode_BC3_LUVW) {
if (set.type == ImageType_HDR) { if (set.type == ImageType_HDR) {

@ -61,6 +61,9 @@ struct MyAssertHandler : public nv::AssertHandler {
virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) { virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) {
fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line); fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line);
nv::debug::dumpInfo(); nv::debug::dumpInfo();
if (nv::debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG;
}
exit(1); exit(1);
} }
}; };

@ -154,11 +154,13 @@ int main(int argc, char *argv[])
bool loadAsFloat = false; bool loadAsFloat = false;
bool rgbm = false; bool rgbm = false;
bool rangescale = false; bool rangescale = false;
bool srgb = false;
const char * externalCompressor = NULL; const char * externalCompressor = NULL;
bool silent = false; bool silent = false;
bool dds10 = false; bool dds10 = false;
bool ktx = false;
nv::Path input; nv::Path input;
nv::Path output; nv::Path output;
@ -285,6 +287,31 @@ int main(int argc, char *argv[])
format = nvtt::Format_BC3_RGBM; format = nvtt::Format_BC3_RGBM;
rgbm = true; rgbm = true;
} }
else if (strcmp("-etc1", argv[i]) == 0)
{
format = nvtt::Format_ETC1;
}
else if (strcmp("-etc2", argv[i]) == 0 || strcmp("-etc2_rgb", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGB;
}
else if (strcmp("-etc2_eac", argv[i]) == 0 || strcmp("-etc2_rgba", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGBA;
}
else if (strcmp("-eac", argv[i]) == 0 || strcmp("-etc2_r", argv[i]) == 0)
{
format = nvtt::Format_ETC2_R;
}
else if (strcmp("-etc2_rg", argv[i]) == 0)
{
format = nvtt::Format_ETC2_R;
}
else if (strcmp("-etc2_rgbm", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGBM;
rgbm = true;
}
// Undocumented option. Mainly used for testing. // Undocumented option. Mainly used for testing.
else if (strcmp("-ext", argv[i]) == 0) else if (strcmp("-ext", argv[i]) == 0)
@ -309,7 +336,15 @@ int main(int argc, char *argv[])
{ {
dds10 = true; dds10 = true;
} }
else if (strcmp("-ktx", argv[i]) == 0)
{
ktx = true;
}
else if (strcmp("-srgb", argv[i]) == 0)
{
srgb = true;
}
else if (argv[i][0] != '-') else if (argv[i][0] != '-')
{ {
input = argv[i]; input = argv[i];
@ -321,15 +356,23 @@ int main(int argc, char *argv[])
{ {
output.copy(input.str()); output.copy(input.str());
output.stripExtension(); output.stripExtension();
output.append(".dds");
if (ktx)
{
output.append(".ktx");
}
else
{
output.append(".dds");
}
} }
break; break;
} }
else else
{ {
printf("Warning: unrecognized option \"%s\"\n", argv[i]); printf("Warning: unrecognized option \"%s\"\n", argv[i]);
} }
} }
const uint version = nvtt::version(); const uint version = nvtt::version();
@ -380,7 +423,9 @@ int main(int argc, char *argv[])
printf("Output options:\n"); printf("Output options:\n");
printf(" -silent \tDo not output progress messages\n"); printf(" -silent \tDo not output progress messages\n");
printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7)\n\n"); printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7, unless ktx is being used)\n");
printf(" -ktx \tUse KTX container format\n");
printf(" -srgb \tIf the requested format allows it, output will be in sRGB color space\n\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }
@ -398,7 +443,7 @@ int main(int argc, char *argv[])
bool useSurface = false; // @@ use Surface API in all cases! bool useSurface = false; // @@ use Surface API in all cases!
nvtt::Surface image; nvtt::Surface image;
if (format == nvtt::Format_BC3_RGBM || rgbm) { if (format == nvtt::Format_BC3_RGBM || format == nvtt::Format_ETC2_RGBM || rgbm) {
useSurface = true; useSurface = true;
if (!image.load(input.str())) { if (!image.load(input.str())) {
@ -440,7 +485,7 @@ int main(int argc, char *argv[])
// To gamma. // To gamma.
image.toGamma(2); image.toGamma(2);
if (format != nvtt::Format_BC3_RGBM) { if (format != nvtt::Format_BC3_RGBM || format != nvtt::Format_ETC2_RGBM) {
image.setAlphaMode(nvtt::AlphaMode_None); image.setAlphaMode(nvtt::AlphaMode_None);
image.toRGBM(1, 0.15f); image.toRGBM(1, 0.15f);
} }
@ -494,7 +539,7 @@ int main(int argc, char *argv[])
nvDebugCheck(dds.isTextureArray()); nvDebugCheck(dds.isTextureArray());
inputOptions.setTextureLayout(nvtt::TextureType_Array, dds.width(), dds.height(), 1, dds.arraySize()); inputOptions.setTextureLayout(nvtt::TextureType_Array, dds.width(), dds.height(), 1, dds.arraySize());
faceCount = dds.arraySize(); faceCount = dds.arraySize();
dds10 = true; dds10 = ktx ? false : true;
} }
uint mipmapCount = dds.mipmapCount(); uint mipmapCount = dds.mipmapCount();
@ -569,11 +614,12 @@ int main(int argc, char *argv[])
inputOptions.setAlphaMode(nvtt::AlphaMode_None); inputOptions.setAlphaMode(nvtt::AlphaMode_None);
} }
// IC: Do not enforce D3D9 restrictions anymore.
// Block compressed textures with mipmaps must be powers of two. // Block compressed textures with mipmaps must be powers of two.
if (!noMipmaps && format != nvtt::Format_RGB) /*if (!noMipmaps && format != nvtt::Format_RGB)
{ {
inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo); inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo);
} }*/
if (normal) if (normal)
{ {
@ -720,15 +766,27 @@ int main(int argc, char *argv[])
outputOptions.setOutputHandler(&outputHandler); outputOptions.setOutputHandler(&outputHandler);
outputOptions.setErrorHandler(&errorHandler); outputOptions.setErrorHandler(&errorHandler);
// Automatically use dds10 if compressing to BC6 or BC7 if (ktx)
if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7)
{
dds10 = true;
}
if (dds10)
{ {
outputOptions.setContainer(nvtt::Container_DDS10); outputOptions.setContainer(nvtt::Container_KTX);
}
else
{
// Automatically use dds10 if compressing to BC6 or BC7
if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7) {
dds10 = true;
}
if (dds10) {
outputOptions.setContainer(nvtt::Container_DDS10);
}
else {
outputOptions.setContainer(nvtt::Container_DDS);
}
}
if (srgb) {
outputOptions.setSrgbFlag(true);
} }
// printf("Press ENTER.\n"); // printf("Press ENTER.\n");

@ -99,8 +99,8 @@ int main(int argc, char *argv[])
return 1; return 1;
} }
break; break;
} }
} }
if (input.isNull() || output.isNull()) if (input.isNull() || output.isNull())
@ -136,21 +136,21 @@ int main(int argc, char *argv[])
nv::FloatImage fimage(&image); nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma); fimage.toLinear(0, 3, gamma);
uint thumbW, thumbH; uint thumbW, thumbH;
if (image.width() > image.height()) if (image.width() > image.height())
{ {
thumbW = size; thumbW = size;
thumbH = uint ((float (image.height()) / float (image.width())) * size); thumbH = uint ((float (image.height()) / float (image.width())) * size);
} }
else else
{ {
thumbW = uint ((float (image.width()) / float (image.height())) * size); thumbW = uint ((float (image.width()) / float (image.height())) * size);
thumbH = size; thumbH = size;
} }
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp)); nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma)); nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB); result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output.str()); nv::StdOutputStream stream(output.str());
nv::ImageIO::save(output.str(), stream, result.ptr(), metaData.buffer()); nv::ImageIO::save(output.str(), stream, result.ptr(), metaData.buffer());
@ -160,7 +160,7 @@ int main(int argc, char *argv[])
nv::StdOutputStream stream(output.str()); nv::StdOutputStream stream(output.str());
nv::ImageIO::save(output.str(), stream, &image, metaData.buffer()); nv::ImageIO::save(output.str(), stream, &image, metaData.buffer());
} }
return 0; return 0;
} }

Loading…
Cancel
Save