diff --git a/CMakeLists.txt b/CMakeLists.txt index ab4dcb6..5e4bab9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,7 +31,7 @@ MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}") MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}") IF(CMAKE_BUILD_TYPE MATCHES "debug") - SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.") + SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.") ADD_DEFINITIONS(-D_DEBUG=1) ENDIF() diff --git a/LICENSE b/LICENSE index bdd7e67..c1bfebc 100644 --- a/LICENSE +++ b/LICENSE @@ -1,6 +1,6 @@ NVIDIA Texture Tools is licensed under the MIT license. -Copyright (c) 2009-2016 Ignacio Castano +Copyright (c) 2009-2017 Ignacio CastaƱo Copyright (c) 2007-2009 NVIDIA Corporation Permission is hereby granted, free of charge, to any person diff --git a/README.md b/README.md index 65b30f4..6b14411 100644 --- a/README.md +++ b/README.md @@ -6,7 +6,7 @@ manipulation tools, designed to be integrated in game tools and asset processing pipelines. The primary features of the library are mipmap and normal map generation, format -conversion and DXT compression. +conversion, and DXT compression. ### How to build (Windows) @@ -42,5 +42,5 @@ src/nvtt/tools/compress.cpp Detailed documentation of the API can be found at: -http://code.google.com/p/nvidia-texture-tools/wiki/ApiDocumentation +https://github.com/castano/nvidia-texture-tools/wiki/ApiDocumentation diff --git a/data/witness/run.sh b/data/witness/run.sh index 1ca6e77..21189da 100644 --- a/data/witness/run.sh +++ b/data/witness/run.sh @@ -36,4 +36,6 @@ do #./nvcompress -silent -alpha -nomips -bc6 $file.$EXT $file.bc6.dds #./nvimgdiff -alpha $file.$EXT $file.bc6.dds + # ETC2-EAC + ./nvcompress -silent -alpha -nomips -etc_rgbm done diff --git a/extern/poshlib/posh.h b/extern/poshlib/posh.h index 5382294..716607d 100644 --- a/extern/poshlib/posh.h +++ b/extern/poshlib/posh.h @@ -349,9 +349,18 @@ LLVM: # define POSH_OS_STRING "UNICOS" #endif -#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx -# define POSH_OS_OSX 1 -# define POSH_OS_STRING "MacOS X" +//ACS if we're in xcode, look at the target conditionals to figure out if this is ios or osx +#if defined __APPLE__ +# include "TargetConditionals.h" +#endif +#if TARGET_OS_IPHONE +# define POSH_OS_IOS 1 +# define POSH_OS_STRING "iOS" +#else +# if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx +# define POSH_OS_OSX 1 +# define POSH_OS_STRING "MacOS X" +# endif #endif #if defined __sun__ || defined sun || defined __sun || defined __solaris__ diff --git a/extern/rg_etc1_v104/rg_etc1.cpp b/extern/rg_etc1_v104/rg_etc1.cpp index d9b9331..d6d6ddf 100644 --- a/extern/rg_etc1_v104/rg_etc1.cpp +++ b/extern/rg_etc1_v104/rg_etc1.cpp @@ -1808,7 +1808,7 @@ typedef unsigned long uint64; { if (block_inten[0] > m_pSorted_luma[n - 1]) { - const uint min_error = labs(int(block_inten[0] - m_pSorted_luma[n - 1])); + const uint min_error = abs(int(block_inten[0] - m_pSorted_luma[n - 1])); if (min_error >= trial_solution.m_error) continue; } @@ -1822,7 +1822,7 @@ typedef unsigned long uint64; { if (m_pSorted_luma[0] > block_inten[3]) { - const uint min_error = labs(int(m_pSorted_luma[0] - block_inten[3])); + const uint min_error = abs(int(m_pSorted_luma[0] - block_inten[3])); if (min_error >= trial_solution.m_error) continue; } @@ -1914,7 +1914,7 @@ done: for (uint packed_c = 0; packed_c < limit; packed_c++) { int v = etc1_decode_value(diff, inten, selector, packed_c); - uint err = labs(v - static_cast(color)); + uint err = abs(v - static_cast(color)); if (err < best_error) { best_error = err; diff --git a/src/bc6h/CMakeLists.txt b/src/bc6h/CMakeLists.txt index 635e0f3..5c01c6c 100644 --- a/src/bc6h/CMakeLists.txt +++ b/src/bc6h/CMakeLists.txt @@ -14,6 +14,7 @@ SET(BC6H_SRCS zohtwo.cpp) ADD_LIBRARY(bc6h STATIC ${BC6H_SRCS}) +TARGET_LINK_LIBRARIES(bc6h nvcore nvmath) IF(NOT WIN32) IF(CMAKE_COMPILER_IS_GNUCXX) diff --git a/src/bc6h/zoh_utils.cpp b/src/bc6h/zoh_utils.cpp index 166d1f3..328477c 100644 --- a/src/bc6h/zoh_utils.cpp +++ b/src/bc6h/zoh_utils.cpp @@ -37,7 +37,7 @@ int Utils::lerp(int a, int b, int i, int denom) case 3: denom *= 5; i *= 5; // fall through to case 15 case 15: weights = denom15_weights_64; break; case 7: weights = denom7_weights_64; break; - default: nvDebugCheck(0); + default: nvUnreachable(); } return (a*weights[denom-i] +b*weights[i] + round) >> shift; diff --git a/src/bc6h/zohone.cpp b/src/bc6h/zohone.cpp index 43a302c..df49cc8 100644 --- a/src/bc6h/zohone.cpp +++ b/src/bc6h/zohone.cpp @@ -584,7 +584,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e { Vector3 pixels[Tile::TILE_TOTAL]; float importance[Tile::TILE_TOTAL]; - float err = 0; + //float err = 0; for (int region=0; region= 0 && pat_index < NPATTERNS); @@ -580,7 +580,7 @@ static float exhaustive(const Vector4 colors[], const float importance[], int np int bhigh = min((1<m_size); return i == this->m_size; } NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; } -#if NV_CC_MSVC +#if NV_NEED_PSEUDOINDEX_WRAPPER NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) { return m_buffer[i(this)]; } diff --git a/src/nvcore/DefsGnucDarwin.h b/src/nvcore/DefsGnucDarwin.h index 75dc027..b0a58db 100644 --- a/src/nvcore/DefsGnucDarwin.h +++ b/src/nvcore/DefsGnucDarwin.h @@ -27,7 +27,7 @@ #define NV_FASTCALL __attribute__((fastcall)) #define NV_FORCEINLINE __attribute__((always_inline)) inline #define NV_DEPRECATED __attribute__((deprecated)) -#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX +#define NV_THREAD_LOCAL __thread #if __GNUC__ > 2 #define NV_PURE __attribute__((pure)) diff --git a/src/nvcore/FileSystem.cpp b/src/nvcore/FileSystem.cpp index bf64c28..4336f5d 100644 --- a/src/nvcore/FileSystem.cpp +++ b/src/nvcore/FileSystem.cpp @@ -31,11 +31,6 @@ bool FileSystem::exists(const char * path) // PathFileExists requires linking to shlwapi.lib //return PathFileExists(path) != 0; return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES; -#elif NV_OS_ORBIS - const int BUFFER_SIZE = 2048; - char file_fullpath[BUFFER_SIZE]; - snprintf(file_fullpath, BUFFER_SIZE, "/app0/%s", path); - return sceFiosExistsSync(NULL, file_fullpath); #else if (FILE * fp = fopen(path, "r")) { @@ -78,3 +73,31 @@ bool FileSystem::removeFile(const char * path) // @@ Use unlink or remove? return remove(path) == 0; } + + +#include "StdStream.h" // for fileOpen + +bool FileSystem::copyFile(const char * src, const char * dst) { + + FILE * fsrc = fileOpen(src, "rb"); + if (fsrc == NULL) return false; + NV_ON_RETURN(fclose(fsrc)); + + FILE * fdst = fileOpen(dst, "wb"); + if (fdst == NULL) return false; + NV_ON_RETURN(fclose(fdst)); + + char buffer[1024]; + size_t n; + + while ((n = fread(buffer, sizeof(char), sizeof(buffer), fsrc)) > 0) { + if (fwrite(buffer, sizeof(char), n, fdst) != n) { + return false; + } + } + + return true; +} + + + diff --git a/src/nvcore/FileSystem.h b/src/nvcore/FileSystem.h index f0f06aa..17379fb 100644 --- a/src/nvcore/FileSystem.h +++ b/src/nvcore/FileSystem.h @@ -15,7 +15,7 @@ namespace nv NVCORE_API bool createDirectory(const char * path); NVCORE_API bool changeDirectory(const char * path); NVCORE_API bool removeFile(const char * path); - + NVCORE_API bool copyFile(const char * src, const char * dst); } // FileSystem namespace } // nv namespace diff --git a/src/nvcore/ForEach.h b/src/nvcore/ForEach.h index 078227f..d7a89cc 100644 --- a/src/nvcore/ForEach.h +++ b/src/nvcore/ForEach.h @@ -33,6 +33,8 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709 #else // If typeof not available: +#define NV_NEED_PSEUDOINDEX_WRAPPER 1 + #include // placement new struct PseudoIndexWrapper { diff --git a/src/nvcore/Memory.cpp b/src/nvcore/Memory.cpp index 644f40a..ab8f5d1 100644 --- a/src/nvcore/Memory.cpp +++ b/src/nvcore/Memory.cpp @@ -2,6 +2,7 @@ #include "Memory.h" #include "Debug.h" +#include "Utils.h" #include @@ -56,6 +57,7 @@ void * realloc(void * ptr, size_t size) #endif } + /* No need to override this unless we want line info. void * operator new (size_t size) throw() { @@ -116,4 +118,32 @@ void operator delete(void* p, const std::nothrow_t&) throw() #endif // NV_OVERRIDE_ALLOC +void * nv::aligned_malloc(size_t size, size_t alignment) +{ + // alignment must be a power of two, multiple of sizeof(void*) + nvDebugCheck(isPowerOfTwo(alignment)); + nvDebugCheck((alignment & (sizeof(void*) - 1)) == 0); + +#if NV_OS_WIN32 || NV_OS_DURANGO + return _aligned_malloc(size, alignment); +#elif NV_OS_DARWIN && !NV_OS_IOS + void * ptr = NULL; + posix_memalign(&ptr, alignment, size); + return ptr; +#elif NV_OS_LINUX + return memalign(alignment, size) +#else // NV_OS_ORBIS || NV_OS_IOS + // @@ IC: iOS appears to be 16 byte aligned, should we check alignment and assert if we request a higher alignment factor? + return ::malloc(size); +#endif +} + +void nv::aligned_free(void * ptr) +{ +#if NV_OS_WIN32 || NV_OS_DURANGO + _aligned_free(ptr); +#else + ::free(ptr); +#endif +} diff --git a/src/nvcore/Memory.h b/src/nvcore/Memory.h index a7fe197..5739e49 100644 --- a/src/nvcore/Memory.h +++ b/src/nvcore/Memory.h @@ -7,10 +7,16 @@ #include "nvcore.h" #include // malloc(), realloc() and free() +#include // memset //#include // size_t //#include // new and delete +#define TRACK_MEMORY_LEAKS 0 +#if TRACK_MEMORY_LEAKS +#include +#endif + #if NV_CC_GNUC # define NV_ALIGN_16 __attribute__ ((__aligned__ (16))) @@ -41,6 +47,8 @@ extern "C" { #endif namespace nv { + NVCORE_API void * aligned_malloc(size_t size, size_t alignment); + NVCORE_API void aligned_free(void * ); // C++ helpers. template NV_FORCEINLINE T * malloc(size_t count) { diff --git a/src/nvcore/Ptr.h b/src/nvcore/Ptr.h index 7275c43..b1f7228 100644 --- a/src/nvcore/Ptr.h +++ b/src/nvcore/Ptr.h @@ -113,7 +113,7 @@ namespace nv public: // BaseClass must implement addRef() and release(). - typedef SmartPtr ThisType; + typedef SmartPtr ThisType; /// Default ctor. SmartPtr() : m_ptr(NULL) diff --git a/src/nvcore/StdStream.h b/src/nvcore/StdStream.h index dbebff2..8dbdf99 100644 --- a/src/nvcore/StdStream.h +++ b/src/nvcore/StdStream.h @@ -213,9 +213,12 @@ namespace nv #elif NV_OS_LINUX return (uint)fread_unlocked(data, 1, len, m_fp); #elif NV_OS_DARWIN - // @@ No error checking, always returns len. + // This is rather lame. Not sure if it's faster than the locked version. for (uint i = 0; i < len; i++) { ((char *)data)[i] = getc_unlocked(m_fp); + if (feof_unlocked(m_fp) != 0) { + return i; + } } return len; #else diff --git a/src/nvcore/StrLib.cpp b/src/nvcore/StrLib.cpp index 72d6612..b285402 100644 --- a/src/nvcore/StrLib.cpp +++ b/src/nvcore/StrLib.cpp @@ -347,26 +347,36 @@ StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg ) } -/** Append a string. */ -StringBuilder & StringBuilder::append( const char * s ) +// Append a character. +StringBuilder & StringBuilder::append( char c ) { - return append(s, U32(strlen( s ))); + return append(&c, 1); } +// Append a string. +StringBuilder & StringBuilder::append( const char * s ) +{ + return append(s, U32(strlen( s ))); +} -/** Append a string. */ +// Append a string. StringBuilder & StringBuilder::append(const char * s, uint len) { nvDebugCheck(s != NULL); - uint offset = length(); - const uint size = offset + len + 1; - reserve(size); - strCpy(m_str + offset, len + 1, s, len); + uint offset = length(); + const uint size = offset + len + 1; + reserve(size); + strCpy(m_str + offset, len + 1, s, len); return *this; } +StringBuilder & StringBuilder::append(const StringBuilder & str) +{ + return append(str.m_str, str.length()); +} + /** Append a formatted string. */ StringBuilder & StringBuilder::appendFormat( const char * fmt, ... ) @@ -516,6 +526,19 @@ StringBuilder & StringBuilder::copy( const StringBuilder & s ) return *this; } +void StringBuilder::removeChar(char c) +{ + char * src = strchr(m_str, c); + if (src) { + char * dst = src; + src++; + while (*src) { + *dst++ = *src++; + } + *dst = '\0'; + } +} + bool StringBuilder::endsWith(const char * str) const { uint l = uint(strlen(str)); @@ -530,7 +553,7 @@ bool StringBuilder::beginsWith(const char * str) const return strncmp(m_str, str, l) == 0; } -// Find given char starting from the end. +// Find given char starting from the end. Why not use strrchr!? char * StringBuilder::reverseFind(char c) { int length = (int)strlen(m_str) - 1; @@ -563,6 +586,19 @@ char * StringBuilder::release() return str; } +// Take ownership of string. +void StringBuilder::acquire(char * str) +{ + if (str) { + m_size = strLen(str) + 1; + m_str = str; + } + else { + m_size = 0; + m_str = NULL; + } +} + // Swap strings. void nv::swap(StringBuilder & a, StringBuilder & b) { swap(a.m_size, b.m_size); @@ -585,19 +621,20 @@ const char * Path::extension() const /*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) { - nvCheck(path != NULL); - - for (int i = 0;; i++) { - if (path[i] == '\0') break; - if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator; + if (path != NULL) { + for (int i = 0;; i++) { + if (path[i] == '\0') break; + if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator; + } } } /// Toggles path separators (ie. \\ into /). void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/) { - nvCheck(!isNull()); - translatePath(m_str, pathSeparator); + if (!isNull()) { + translatePath(m_str, pathSeparator); + } } void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/) diff --git a/src/nvcore/StrLib.h b/src/nvcore/StrLib.h index f4f3ac3..c6ab71d 100644 --- a/src/nvcore/StrLib.h +++ b/src/nvcore/StrLib.h @@ -105,8 +105,10 @@ namespace nv StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3))); StringBuilder & formatList( const char * format, va_list arg ); + StringBuilder & append(char c); StringBuilder & append(const char * str); - StringBuilder & append(const char * str, uint len); + StringBuilder & append(const char * str, uint len); + StringBuilder & append(const StringBuilder & str); StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3))); StringBuilder & appendFormatList(const char * format, va_list arg); @@ -122,6 +124,8 @@ namespace nv StringBuilder & toLower(); StringBuilder & toUpper(); + + void removeChar(char c); bool endsWith(const char * str) const; bool beginsWith(const char * str) const; @@ -129,15 +133,16 @@ namespace nv char * reverseFind(char c); void reset(); - bool isNull() const { return m_size == 0; } + NV_FORCEINLINE bool isNull() const { return m_size == 0; } // const char * accessors //operator const char * () const { return m_str; } //operator char * () { return m_str; } - const char * str() const { return m_str; } - char * str() { return m_str; } + NV_FORCEINLINE const char * str() const { return m_str; } + NV_FORCEINLINE char * str() { return m_str; } - char * release(); + char * release(); // Release ownership of string. + void acquire(char *); // Take ownership of string. /// Implement value semantics. StringBuilder & operator=( const StringBuilder & s ) { @@ -280,25 +285,25 @@ namespace nv /// Equal operator. bool operator==( const String & str ) const { - return strMatch(str.data, data); + return strEqual(str.data, data); } /// Equal operator. bool operator==( const char * str ) const { - return strMatch(str, data); + return strEqual(str, data); } /// Not equal operator. bool operator!=( const String & str ) const { - return !strMatch(str.data, data); + return !strEqual(str.data, data); } /// Not equal operator. bool operator!=( const char * str ) const { - return !strMatch(str, data); + return !strEqual(str, data); } /// Returns true if this string is the null string. diff --git a/src/nvcore/Stream.h b/src/nvcore/Stream.h index 513cd0c..8e74380 100644 --- a/src/nvcore/Stream.h +++ b/src/nvcore/Stream.h @@ -76,13 +76,13 @@ namespace nv void advance(uint offset) { seek(tell() + offset); } - // friends + // friends friend Stream & operator<<( Stream & s, bool & c ) { #if NV_OS_DARWIN && !NV_CC_CPP11 nvStaticCheck(sizeof(bool) == 4); uint8 b = c ? 1 : 0; s.serialize( &b, 1 ); - c = (b == 1); + c = (b != 0); #else nvStaticCheck(sizeof(bool) == 1); s.serialize( &c, 1 ); diff --git a/src/nvcore/Utils.h b/src/nvcore/Utils.h index 778b252..2eb692c 100644 --- a/src/nvcore/Utils.h +++ b/src/nvcore/Utils.h @@ -39,6 +39,28 @@ namespace nv // These intentionally look like casts. + // uint64 casts: + template inline uint64 U64(T x) { return x; } + //template <> inline uint64 U64(uint64 x) { return x; } + template <> inline uint64 U64(int64 x) { nvDebugCheck(x >= 0); return (uint64)x; } + //template <> inline uint64 U32(uint32 x) { return x; } + template <> inline uint64 U64(int32 x) { nvDebugCheck(x >= 0); return (uint64)x; } + //template <> inline uint64 U64(uint16 x) { return x; } + template <> inline uint64 U64(int16 x) { nvDebugCheck(x >= 0); return (uint64)x; } + //template <> inline uint64 U64(uint8 x) { return x; } + template <> inline uint64 U64(int8 x) { nvDebugCheck(x >= 0); return (uint64)x; } + + // int64 casts: + template inline int64 I64(T x) { return x; } + template <> inline int64 I64(uint64 x) { nvDebugCheck(x <= NV_INT64_MAX); return (int64)x; } + //template <> inline uint64 U64(int64 x) { return x; } + //template <> inline uint64 U32(uint32 x) { return x; } + //template <> inline uint64 U64(int32 x) { return x; } + //template <> inline uint64 U64(uint16 x) { return x; } + //template <> inline uint64 U64(int16 x) { return x; } + //template <> inline uint64 U64(uint8 x) { return x; } + //template <> inline uint64 U64(int8 x) { return x; } + // uint32 casts: template inline uint32 U32(T x) { return x; } template <> inline uint32 U32(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; } @@ -50,6 +72,11 @@ namespace nv //template <> inline uint32 U32(uint8 x) { return x; } template <> inline uint32 U32(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; } +#if NV_OS_DARWIN + template <> inline uint32 U32(unsigned long x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; } + template <> inline uint32 U32(long x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; } +#endif + // int32 casts: template inline int32 I32(T x) { return x; } template <> inline int32 I32(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; } @@ -182,7 +209,7 @@ namespace nv * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x * @note nextPowerOfTwo(x) = 2 << log2(x-1) */ - inline uint nextPowerOfTwo( uint x ) + inline uint32 nextPowerOfTwo(uint32 x) { nvDebugCheck( x != 0 ); #if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction. @@ -202,8 +229,19 @@ namespace nv #endif } - /// Return true if @a n is a power of two. - inline bool isPowerOfTwo( uint n ) + inline uint64 nextPowerOfTwo(uint64 x) + { + nvDebugCheck(x != 0); + uint p = 1; + while (x > p) { + p += p; + } + return p; + } + + // @@ Should I just use a macro instead? + template + inline bool isPowerOfTwo(T n) { return (n & (n-1)) == 0; } diff --git a/src/nvcore/nvcore.h b/src/nvcore/nvcore.h index b402bb2..ecbaa5b 100644 --- a/src/nvcore/nvcore.h +++ b/src/nvcore/nvcore.h @@ -56,6 +56,7 @@ # define NV_OS_MINGW 1 # define NV_OS_WIN32 1 #elif defined POSH_OS_OSX +# define NV_OS_OSX 1 // IC: Adding this, because iOS defines NV_OS_DARWIN too. # define NV_OS_DARWIN 1 # define NV_OS_UNIX 1 #elif defined POSH_OS_IOS @@ -78,9 +79,9 @@ // Threading: // some platforms don't implement __thread or similar for thread-local-storage -#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios? +#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS # define NV_OS_USE_PTHREAD 1 -# if NV_OS_DARWIN || NV_OS_IOS +# if 0 //Apple finally added TLS support to iOS!// NV_OS_IOS # define NV_OS_HAS_TLS_QUALIFIER 0 # else # define NV_OS_HAS_TLS_QUALIFIER 1 @@ -96,7 +97,7 @@ // NV_CPU_X86_64 // NV_CPU_PPC // NV_CPU_ARM -// NV_CPU_AARCH64 +// NV_CPU_ARM_64 #define NV_CPU_STRING POSH_CPU_STRING @@ -110,7 +111,7 @@ #elif defined POSH_CPU_STRONGARM # define NV_CPU_ARM 1 #elif defined POSH_CPU_AARCH64 -# define NV_CPU_AARCH64 1 +# define NV_CPU_ARM_64 1 #else # error "Unsupported CPU" #endif @@ -148,10 +149,16 @@ #endif // Endiannes: -#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN -#define NV_BIG_ENDIAN POSH_BIG_ENDIAN -#define NV_ENDIAN_STRING POSH_ENDIAN_STRING - +// @@ POSH endian detection is broken for arm64 on iOS. They are bi-endian and iOS sets all their processors to little endian by default. +#if NV_OS_IOS +# define NV_LITTLE_ENDIAN 1 +# define NV_BIG_ENDIAN 0 +# define NV_ENDIAN_STRING "little" +#else +# define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN +# define NV_BIG_ENDIAN POSH_BIG_ENDIAN +# define NV_ENDIAN_STRING POSH_ENDIAN_STRING +#endif // Define the right printf prefix for size_t arguments: #if POSH_64BIT_POINTER @@ -164,6 +171,28 @@ // cmake config #include "nvconfig.h" +#if NV_OS_DARWIN +#include +//#include + +// Type definitions: +typedef uint8_t uint8; +typedef int8_t int8; + +typedef uint16_t uint16; +typedef int16_t int16; + +typedef uint32_t uint32; +typedef int32_t int32; + +typedef uint64_t uint64; +typedef int64_t int64; + +// POSH gets this wrong due to __LP64__ +#undef POSH_I64_PRINTF_PREFIX +#define POSH_I64_PRINTF_PREFIX "ll" + +#else // Type definitions: typedef posh_u8_t uint8; @@ -175,8 +204,23 @@ typedef posh_i16_t int16; typedef posh_u32_t uint32; typedef posh_i32_t int32; +//#if NV_OS_DARWIN +// OSX-64 is supposed to be LP64 (longs and pointers are 64 bits), thus uint64 is defined as +// unsigned long. However, some OSX headers define it as unsigned long long, producing errors, +// even though both types are 64 bit. Ideally posh should handle that, but it has not been +// updated in ages, so here I'm just falling back to the standard C99 types defined in inttypes.h +//#include +//typedef posh_u64_t uint64_t; +//typedef posh_i64_t int64_t; +//#else typedef posh_u64_t uint64; typedef posh_i64_t int64; +//#endif +#if NV_OS_DARWIN +// To avoid duplicate definitions. +#define _UINT64 +#endif +#endif // Aliases typedef uint32 uint; @@ -246,8 +290,10 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4); NV_COMPILER_CHECK(sizeof(int32) == 4); NV_COMPILER_CHECK(sizeof(uint32) == 4); - -#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) +#include // for size_t +template char (&ArraySizeHelper(T (&array)[N]))[N]; +#define NV_ARRAY_SIZE(x) sizeof(ArraySizeHelper(x)) +//#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) #if 0 // Disabled in The Witness. #if NV_CC_MSVC @@ -269,8 +315,38 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4); NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \ } +namespace nv { + template + struct ScopeExit { + ScopeExit(F f) : f(f) {} + ~ScopeExit() { f(); } + F f; + }; + + template + ScopeExit MakeScopeExit(F f) { + return ScopeExit(f); + }; +} + +#define NV_ON_RETURN(code) \ + auto NV_STRING_JOIN2(scope_exit_, __LINE__) = nv::MakeScopeExit([=](){code;}) + + // Indicate the compiler that the parameter is not used to suppress compier warnings. +#if NV_CC_MSVC #define NV_UNUSED(a) ((a)=(a)) +#else +#define NV_UNUSED(a) _Pragma(NV_STRING(unused(a))) +#endif + +#if NV_CC_GNUC || NV_CC_CLANG +#define NV_LIKELY(x) __builtin_expect(!!(x), 1) +#define NV_UNLIKELY(x) __builtin_expect(!!(x), 0) +#else +#define NV_LIKELY(x) x +#define NV_UNLIKELY(x) x +#endif // Null index. @@ Move this somewhere else... it's only used by nvmesh. //const unsigned int NIL = unsigned int(~0); diff --git a/src/nvimage/BlockDXT.cpp b/src/nvimage/BlockDXT.cpp index 9d334c4..e98d8fa 100644 --- a/src/nvimage/BlockDXT.cpp +++ b/src/nvimage/BlockDXT.cpp @@ -632,44 +632,45 @@ void BlockCTX1::setIndices(int * idx) /// Decode BC6 block. -void BlockBC6::decodeBlock(Vector3 colors[16]) const -{ - ZOH::Tile tile(4, 4); - ZOH::decompress((const char *)data, tile); - - // Convert ZOH's tile struct to Vector3, and convert half to float. - for (uint y = 0; y < 4; ++y) - { - for (uint x = 0; x < 4; ++x) - { - uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x); - uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y); - uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z); - colors[y * 4 + x].x = to_float(rHalf); - colors[y * 4 + x].y = to_float(gHalf); - colors[y * 4 + x].z = to_float(bHalf); - } - } +void BlockBC6::decodeBlock(Vector4 colors[16]) const +{ + ZOH::Tile tile(4, 4); + ZOH::decompress((const char *)data, tile); + + // Convert ZOH's tile struct to Vector3, and convert half to float. + for (uint y = 0; y < 4; ++y) + { + for (uint x = 0; x < 4; ++x) + { + uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x); + uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y); + uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z); + colors[y * 4 + x].x = to_float(rHalf); + colors[y * 4 + x].y = to_float(gHalf); + colors[y * 4 + x].z = to_float(bHalf); + colors[y * 4 + x].w = 1.0f; + } + } } /// Decode BC7 block. void BlockBC7::decodeBlock(ColorBlock * block) const { - AVPCL::Tile tile(4, 4); - AVPCL::decompress((const char *)data, tile); - - // Convert AVPCL's tile struct back to NVTT's. - for (uint y = 0; y < 4; ++y) - { - for (uint x = 0; x < 4; ++x) - { - Vector4 rgba = tile.data[y][x]; - // Note: decoded rgba values are in [0, 255] range and should be an integer, - // because BC7 never uses more than 8 bits per channel. So no need to round. - block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w)); - } - } + AVPCL::Tile tile(4, 4); + AVPCL::decompress((const char *)data, tile); + + // Convert AVPCL's tile struct back to NVTT's. + for (uint y = 0; y < 4; ++y) + { + for (uint x = 0; x < 4; ++x) + { + Vector4 rgba = tile.data[y][x]; + // Note: decoded rgba values are in [0, 255] range and should be an integer, + // because BC7 never uses more than 8 bits per channel. So no need to round. + block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w)); + } + } } diff --git a/src/nvimage/BlockDXT.h b/src/nvimage/BlockDXT.h index 40f615f..c462761 100644 --- a/src/nvimage/BlockDXT.h +++ b/src/nvimage/BlockDXT.h @@ -36,6 +36,7 @@ namespace nv struct AlphaBlock4x4; class Stream; class Vector3; + class Vector4; /// DXT1 block. @@ -220,7 +221,7 @@ namespace nv struct BlockBC6 { uint8 data[16]; // Not even going to try to write a union for this thing. - void decodeBlock(Vector3 colors[16]) const; + void decodeBlock(Vector4 colors[16]) const; }; /// BC7 block. diff --git a/src/nvimage/CMakeLists.txt b/src/nvimage/CMakeLists.txt index dce627d..42f21f4 100644 --- a/src/nvimage/CMakeLists.txt +++ b/src/nvimage/CMakeLists.txt @@ -14,7 +14,8 @@ SET(IMAGE_SRCS NormalMap.h NormalMap.cpp PixelFormat.h PsdFile.h - TgaFile.h) + TgaFile.h + KtxFile.h KtxFile.cpp) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp index 3be26b9..be8146e 100644 --- a/src/nvimage/DirectDrawSurface.cpp +++ b/src/nvimage/DirectDrawSurface.cpp @@ -454,7 +454,8 @@ namespace { D3DFMT_L8, DXGI_FORMAT_R8_UNORM , { 8, 0xFF, 0, 0, 0 } }, { D3DFMT_L16, DXGI_FORMAT_R16_UNORM, { 16, 0xFFFF, 0, 0, 0 } }, - { D3DFMT_A8L8, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0, 0, 0xFF00 } }, + { D3DFMT_A8L8, 0, { 16, 0xFF, 0, 0, 0xFF00 } }, + { 0, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0xFF00, 0, 0 } }, }; static const uint s_formatCount = NV_ARRAY_SIZE(s_formats); @@ -635,7 +636,7 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3) { // set fourcc pixel format. this->pf.flags = DDPF_FOURCC; - this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); + this->pf.fourcc = NV_MAKEFOURCC(c0, c1, c2, c3); this->pf.bitcount = 0; this->pf.rmask = 0; @@ -659,7 +660,7 @@ void DDSHeader::setFormatCode(uint32 code) void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3) { - this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3); + this->pf.bitcount = NV_MAKEFOURCC(c0, c1, c2, c3); } @@ -1445,7 +1446,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) { BlockBC6 block; *stream << block; - Vector3 colors[16]; + Vector4 colors[16]; block.decodeBlock(colors); // Clamp to [0, 1] and round to 8-bit @@ -1453,7 +1454,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba) { for (int x = 0; x < 4; ++x) { - Vector3 px = colors[y*4 + x]; + Vector4 px = colors[y*4 + x]; rgba->color(x, y).setRGBA( ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f), ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f), @@ -1535,7 +1536,7 @@ uint DirectDrawSurface::surfaceSize(uint mipmap) const else { w = (w + 3) / 4; h = (h + 3) / 4; - d = d; // @@ How are 3D textures aligned? + //d = d; // @@ How are 3D textures aligned? return blockSize * w * h * d; } } diff --git a/src/nvimage/DirectDrawSurface.h b/src/nvimage/DirectDrawSurface.h index 5b48a8c..1049f06 100644 --- a/src/nvimage/DirectDrawSurface.h +++ b/src/nvimage/DirectDrawSurface.h @@ -27,11 +27,9 @@ #include "nvimage.h" -#if !defined(MAKEFOURCC) -#define MAKEFOURCC(ch0, ch1, ch2, ch3) \ +#define NV_MAKEFOURCC(ch0, ch1, ch2, ch3) \ (uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \ (uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 )) -#endif namespace nv { @@ -101,19 +99,26 @@ namespace nv enum FOURCC { - FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'), - FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '), - FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), - FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), - FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), - FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), - FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), - FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'), - FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), - FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), - FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'), - FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'), - FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'), + FOURCC_NVTT = NV_MAKEFOURCC('N', 'V', 'T', 'T'), + FOURCC_DDS = NV_MAKEFOURCC('D', 'D', 'S', ' '), + FOURCC_DXT1 = NV_MAKEFOURCC('D', 'X', 'T', '1'), + FOURCC_DXT2 = NV_MAKEFOURCC('D', 'X', 'T', '2'), + FOURCC_DXT3 = NV_MAKEFOURCC('D', 'X', 'T', '3'), + FOURCC_DXT4 = NV_MAKEFOURCC('D', 'X', 'T', '4'), + FOURCC_DXT5 = NV_MAKEFOURCC('D', 'X', 'T', '5'), + FOURCC_RXGB = NV_MAKEFOURCC('R', 'X', 'G', 'B'), + FOURCC_ATI1 = NV_MAKEFOURCC('A', 'T', 'I', '1'), + FOURCC_ATI2 = NV_MAKEFOURCC('A', 'T', 'I', '2'), + FOURCC_A2XY = NV_MAKEFOURCC('A', '2', 'X', 'Y'), + FOURCC_DX10 = NV_MAKEFOURCC('D', 'X', '1', '0'), + FOURCC_UVER = NV_MAKEFOURCC('U', 'V', 'E', 'R'), + FOURCC_BC6H = NV_MAKEFOURCC('B', 'C', '6', 'H'), + FOURCC_BC7L = NV_MAKEFOURCC('B', 'C', '7', 'L'), + + FOURCC_PVR0 = NV_MAKEFOURCC('P', 'V', 'R', '0'), + FOURCC_PVR1 = NV_MAKEFOURCC('P', 'V', 'R', '1'), + FOURCC_PVR2 = NV_MAKEFOURCC('P', 'V', 'R', '2'), + FOURCC_PVR3 = NV_MAKEFOURCC('P', 'V', 'R', '3'), }; diff --git a/src/nvimage/ErrorMetric.cpp b/src/nvimage/ErrorMetric.cpp index 3f10a72..3f632ef 100644 --- a/src/nvimage/ErrorMetric.cpp +++ b/src/nvimage/ErrorMetric.cpp @@ -1,460 +1,513 @@ - -#include "ErrorMetric.h" -#include "FloatImage.h" -#include "Filter.h" - -#include "nvmath/Matrix.h" -#include "nvmath/Vector.inl" - -#include // FLT_MAX - -using namespace nv; - -float nv::rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) -{ - if (!sameLayout(img, ref)) { - return FLT_MAX; - } - nvDebugCheck(img->componentCount() == 4); - nvDebugCheck(ref->componentCount() == 4); - - double mse = 0; - - const uint count = img->pixelCount(); - for (uint i = 0; i < count; i++) - { - float r0 = ref->pixel(i + count * 0); - float g0 = ref->pixel(i + count * 1); - float b0 = ref->pixel(i + count * 2); - float a0 = ref->pixel(i + count * 3); - float r1 = img->pixel(i + count * 0); - float g1 = img->pixel(i + count * 1); - float b1 = img->pixel(i + count * 2); - //float a1 = img->pixel(i + count * 3); - - float r = r0 - r1; - float g = g0 - g1; - float b = b0 - b1; - - float a = 1; - if (alphaWeight) a = a0 * a0; // @@ a0*a1 or a0*a0 ? - - mse += (r * r) * a; - mse += (g * g) * a; - mse += (b * b) * a; - } - - return float(sqrt(mse / count)); -} - -float nv::rmsAlphaError(const FloatImage * ref, const FloatImage * img) -{ - if (!sameLayout(img, ref)) { - return FLT_MAX; - } - nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4); - - double mse = 0; - - const uint count = img->pixelCount(); - for (uint i = 0; i < count; i++) - { - float a0 = img->pixel(i + count * 3); - float a1 = ref->pixel(i + count * 3); - - float a = a0 - a1; - - mse += a * a; - } - - return float(sqrt(mse / count)); -} - - -float nv::averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) -{ - if (!sameLayout(img, ref)) { - return FLT_MAX; - } - nvDebugCheck(img->componentCount() == 4); - nvDebugCheck(ref->componentCount() == 4); - - double mae = 0; - - const uint count = img->pixelCount(); - for (uint i = 0; i < count; i++) - { - float r0 = img->pixel(i + count * 0); - float g0 = img->pixel(i + count * 1); - float b0 = img->pixel(i + count * 2); - //float a0 = img->pixel(i + count * 3); - float r1 = ref->pixel(i + count * 0); - float g1 = ref->pixel(i + count * 1); - float b1 = ref->pixel(i + count * 2); - float a1 = ref->pixel(i + count * 3); - - float r = fabs(r0 - r1); - float g = fabs(g0 - g1); - float b = fabs(b0 - b1); - - float a = 1; - if (alphaWeight) a = a1; - - mae += r * a; - mae += g * a; - mae += b * a; - } - - return float(mae / count); -} - -float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img) -{ - if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) { - return FLT_MAX; - } - nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4); - - double mae = 0; - - const uint count = img->width() * img->height(); - for (uint i = 0; i < count; i++) - { - float a0 = img->pixel(i + count * 3); - float a1 = ref->pixel(i + count * 3); - - float a = a0 - a1; - - mae += fabs(a); - } - - return float(mae / count); -} - - -// Color space conversions based on: -// http://www.brucelindbloom.com/ - -// Assumes input is in *linear* sRGB color space. -static Vector3 rgbToXyz(Vector3::Arg c) -{ - Vector3 xyz; - xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z; - xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z; - xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z; - return xyz; -} - -static Vector3 xyzToRgb(Vector3::Arg c) -{ - Vector3 rgb; - rgb.x = 3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z; - rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z; - rgb.z = 0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z; - return rgb; -} - -static float toLinear(float f) -{ - return powf(f, 2.2f); -} - -static float toGamma(float f) -{ - // @@ Use sRGB space? - return powf(f, 1.0f/2.2f); -} - -static Vector3 toLinear(Vector3::Arg c) -{ - return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z)); -} - -static Vector3 toGamma(Vector3::Arg c) -{ - return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z)); -} - -static float f(float t) -{ - const float epsilon = powf(6.0f/29.0f, 3); - - if (t > epsilon) { - return powf(t, 1.0f/3.0f); - } - else { - return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f; - } -} - -static float finv(float t) -{ - if (t > 6.0f / 29.0f) { - return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f); - } - else { - return powf(t, 3.0f); - } -} - -static Vector3 xyzToCieLab(Vector3::Arg c) -{ - // Normalized white point. - const float Xn = 0.950456f; - const float Yn = 1.0f; - const float Zn = 1.088754f; - - float Xr = c.x / Xn; - float Yr = c.y / Yn; - float Zr = c.z / Zn; - - float fx = f(Xr); - float fy = f(Yr); - float fz = f(Zr); - - float L = 116 * fx - 16; - float a = 500 * (fx - fy); - float b = 200 * (fy - fz); - - return Vector3(L, a, b); -} - -static Vector3 rgbToCieLab(Vector3::Arg c) -{ - return xyzToCieLab(rgbToXyz(toLinear(c))); -} - -// h is hue-angle in radians -static Vector3 cieLabToLCh(Vector3::Arg c) -{ - return Vector3(c.x, sqrtf(c.y*c.y + c.z*c.z), atan2f(c.y, c.z)); -} - -static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage) -{ - nvDebugCheck(rgbImage != NULL && LabImage != NULL); - nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height()); - nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3); - - const uint w = rgbImage->width(); - const uint h = LabImage->height(); - - const float * R = rgbImage->channel(0); - const float * G = rgbImage->channel(1); - const float * B = rgbImage->channel(2); - - float * L = LabImage->channel(0); - float * a = LabImage->channel(1); - float * b = LabImage->channel(2); - - const uint count = w*h; - for (uint i = 0; i < count; i++) - { - Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i])); - L[i] = Lab.x; - a[i] = Lab.y; - b[i] = Lab.z; - } -} - - -// Assumes input images are in linear sRGB space. -float nv::cieLabError(const FloatImage * img0, const FloatImage * img1) -{ - if (!sameLayout(img0, img1)) return FLT_MAX; - nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); - - const float * r0 = img0->channel(0); - const float * g0 = img0->channel(1); - const float * b0 = img0->channel(2); - - const float * r1 = img1->channel(0); - const float * g1 = img1->channel(1); - const float * b1 = img1->channel(2); - - double error = 0.0f; - - const uint count = img0->pixelCount(); - for (uint i = 0; i < count; i++) - { - Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i])); - Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i])); - - // @@ Measure Delta E. - Vector3 delta = lab0 - lab1; - - error += length(delta); - } - - return float(error / count); -} - -// Assumes input images are in linear sRGB space. -float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1) -{ - if (!sameLayout(img0, img1)) return FLT_MAX; - nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); - - const float kL = 1; - const float kC = 1; - const float kH = 1; - const float k1 = 0.045f; - const float k2 = 0.015f; - - const float sL = 1; - - const float * r0 = img0->channel(0); - const float * g0 = img0->channel(1); - const float * b0 = img0->channel(2); - - const float * r1 = img1->channel(0); - const float * g1 = img1->channel(1); - const float * b1 = img1->channel(2); - - double error = 0.0f; - - const uint count = img0->pixelCount(); - for (uint i = 0; i < count; ++i) - { - Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i])); - Vector3 lch0 = cieLabToLCh(lab0); - Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i])); - Vector3 lch1 = cieLabToLCh(lab1); - - const float sC = 1 + k1*lch0.x; - const float sH = 1 + k2*lch0.x; - - // @@ Measure Delta E using the 1994 definition - Vector3 labDelta = lab0 - lab1; - Vector3 lchDelta = lch0 - lch1; - - double deltaLsq = powf(lchDelta.x / (kL*sL), 2); - double deltaCsq = powf(lchDelta.y / (kC*sC), 2); - - // avoid possible sqrt of negative value by computing (deltaH/(kH*sH))^2 - double deltaHsq = powf(labDelta.y, 2) + powf(labDelta.z, 2) - powf(lchDelta.y, 2); - deltaHsq /= powf(kH*sH, 2); - - error += sqrt(deltaLsq + deltaCsq + deltaHsq); - } - - return float(error / count); -} - -float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1) -{ - if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { - return FLT_MAX; - } - nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); - - uint w = img0->width(); - uint h = img0->height(); - uint d = img0->depth(); - - FloatImage lab0, lab1; // Original images in CIE-Lab space. - lab0.allocate(3, w, h, d); - lab1.allocate(3, w, h, d); - - // Convert input images to CIE-Lab. - rgbToCieLab(img0, &lab0); - rgbToCieLab(img1, &lab1); - - // @@ Convolve each channel by the corresponding filter. - /* - GaussianFilter LFilter(5); - GaussianFilter aFilter(5); - GaussianFilter bFilter(5); - - lab0.convolve(0, LFilter); - lab0.convolve(1, aFilter); - lab0.convolve(2, bFilter); - - lab1.convolve(0, LFilter); - lab1.convolve(1, aFilter); - lab1.convolve(2, bFilter); - */ - // @@ Measure Delta E between lab0 and lab1. - - return 0.0f; -} - - -// Assumes input images are normal maps. -float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1) -{ - if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { - return FLT_MAX; - } - nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); - - uint w = img0->width(); - uint h = img0->height(); - - const float * x0 = img0->channel(0); - const float * y0 = img0->channel(1); - const float * z0 = img0->channel(2); - - const float * x1 = img1->channel(0); - const float * y1 = img1->channel(1); - const float * z1 = img1->channel(2); - - double error = 0.0f; - - const uint count = w*h; - for (uint i = 0; i < count; i++) - { - Vector3 n0 = Vector3(x0[i], y0[i], z0[i]); - Vector3 n1 = Vector3(x1[i], y1[i], z1[i]); - - n0 = 2.0f * n0 - Vector3(1); - n1 = 2.0f * n1 - Vector3(1); - - n0 = normalizeSafe(n0, Vector3(0), 0.0f); - n1 = normalizeSafe(n1, Vector3(0), 0.0f); - - error += acos(clamp(dot(n0, n1), -1.0f, 1.0f)); - } - - return float(error / count); -} - -float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1) -{ - if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { - return FLT_MAX; - } - nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); - - uint w = img0->width(); - uint h = img0->height(); - - const float * x0 = img0->channel(0); - const float * y0 = img0->channel(1); - const float * z0 = img0->channel(2); - - const float * x1 = img1->channel(0); - const float * y1 = img1->channel(1); - const float * z1 = img1->channel(2); - - double error = 0.0f; - - const uint count = w*h; - for (uint i = 0; i < count; i++) - { - Vector3 n0 = Vector3(x0[i], y0[i], z0[i]); - Vector3 n1 = Vector3(x1[i], y1[i], z1[i]); - - n0 = 2.0f * n0 - Vector3(1); - n1 = 2.0f * n1 - Vector3(1); - - n0 = normalizeSafe(n0, Vector3(0), 0.0f); - n1 = normalizeSafe(n1, Vector3(0), 0.0f); - - float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f)); - error += angle * angle; - } - - return float(sqrt(error / count)); -} - + +#include "ErrorMetric.h" +#include "FloatImage.h" +#include "Filter.h" + +#include "nvmath/Matrix.h" +#include "nvmath/Vector.inl" + +#include // FLT_MAX + +using namespace nv; + +float nv::rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) +{ + if (!sameLayout(img, ref)) { + return FLT_MAX; + } + nvDebugCheck(img->componentCount() == 4); + nvDebugCheck(ref->componentCount() == 4); + + double mse = 0; + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + float r0 = ref->pixel(i + count * 0); + float g0 = ref->pixel(i + count * 1); + float b0 = ref->pixel(i + count * 2); + float a0 = ref->pixel(i + count * 3); + float r1 = img->pixel(i + count * 0); + float g1 = img->pixel(i + count * 1); + float b1 = img->pixel(i + count * 2); + //float a1 = img->pixel(i + count * 3); + + float r = r0 - r1; + float g = g0 - g1; + float b = b0 - b1; + + float a = 1; + if (alphaWeight) a = a0 * a0; // @@ a0*a1 or a0*a0 ? + + mse += (r * r) * a; + mse += (g * g) * a; + mse += (b * b) * a; + } + + return float(sqrt(mse / count)); +} + +float nv::rmsAlphaError(const FloatImage * ref, const FloatImage * img) +{ + if (!sameLayout(img, ref)) { + return FLT_MAX; + } + nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4); + + double mse = 0; + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + float a0 = img->pixel(i + count * 3); + float a1 = ref->pixel(i + count * 3); + + float a = a0 - a1; + + mse += a * a; + } + + return float(sqrt(mse / count)); +} + + +float nv::averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight) +{ + if (!sameLayout(img, ref)) { + return FLT_MAX; + } + nvDebugCheck(img->componentCount() == 4); + nvDebugCheck(ref->componentCount() == 4); + + double mae = 0; + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) + { + float r0 = img->pixel(i + count * 0); + float g0 = img->pixel(i + count * 1); + float b0 = img->pixel(i + count * 2); + //float a0 = img->pixel(i + count * 3); + float r1 = ref->pixel(i + count * 0); + float g1 = ref->pixel(i + count * 1); + float b1 = ref->pixel(i + count * 2); + float a1 = ref->pixel(i + count * 3); + + float r = fabs(r0 - r1); + float g = fabs(g0 - g1); + float b = fabs(b0 - b1); + + float a = 1; + if (alphaWeight) a = a1; + + mae += r * a; + mae += g * a; + mae += b * a; + } + + return float(mae / count); +} + +float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img) +{ + if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) { + return FLT_MAX; + } + nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4); + + double mae = 0; + + const uint count = img->width() * img->height(); + for (uint i = 0; i < count; i++) + { + float a0 = img->pixel(i + count * 3); + float a1 = ref->pixel(i + count * 3); + + float a = a0 - a1; + + mae += fabs(a); + } + + return float(mae / count); +} + + +float nv::rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight) +{ + nvDebugCheck(img->componentCount() == 4); + nvDebugCheck(ref->componentCount() == 4); + + double mse = 0; + + const uint w0 = ref->width(); + const uint h0 = ref->height(); + const uint d0 = ref->depth(); + + const uint w1 = img->width(); + const uint h1 = img->height(); + const uint d1 = img->depth(); + + for (uint z = 0; z < d0; z++) { + for (uint y = 0; y < h0; y++) { + for (uint x = 0; x < w0; x++) { + float r0 = ref->pixel(0, x, y, z); + float g0 = ref->pixel(1, x, y, z); + float b0 = ref->pixel(2, x, y, z); + float a0 = ref->pixel(3, x, y, z); + + float fx = float(x) / w0; + float fy = float(y) / h0; + float fz = float(z) / d0; + + float r1 = img->sampleLinear(0, fx, fy, fz, wm); + float g1 = img->sampleLinear(1, fx, fy, fz, wm); + float b1 = img->sampleLinear(2, fx, fy, fz, wm); + float a1 = img->sampleLinear(2, fx, fy, fz, wm); + + float dr = r0 - r1; + float dg = g0 - g1; + float db = b0 - b1; + float da = a0 - a1; + + float w = 1; + if (alphaWeight) w = a0 * a0; // @@ a0*a1 or a0*a0 ? + + mse += (dr * dr) * w; + mse += (dg * dg) * w; + mse += (db * db) * w; + mse += (da * da); + } + } + } + + int count = w0 * h0 * d0; + return float(sqrt(mse / count)); +} + + +// Color space conversions based on: +// http://www.brucelindbloom.com/ + +// Assumes input is in *linear* sRGB color space. +static Vector3 rgbToXyz(Vector3::Arg c) +{ + Vector3 xyz; + xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z; + xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z; + xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z; + return xyz; +} + +static Vector3 xyzToRgb(Vector3::Arg c) +{ + Vector3 rgb; + rgb.x = 3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z; + rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z; + rgb.z = 0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z; + return rgb; +} + +static float toLinear(float f) +{ + return powf(f, 2.2f); +} + +static float toGamma(float f) +{ + // @@ Use sRGB space? + return powf(f, 1.0f/2.2f); +} + +static Vector3 toLinear(Vector3::Arg c) +{ + return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z)); +} + +static Vector3 toGamma(Vector3::Arg c) +{ + return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z)); +} + +static float f(float t) +{ + const float epsilon = powf(6.0f/29.0f, 3); + + if (t > epsilon) { + return powf(t, 1.0f/3.0f); + } + else { + return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f; + } +} + +static float finv(float t) +{ + if (t > 6.0f / 29.0f) { + return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f); + } + else { + return powf(t, 3.0f); + } +} + +static Vector3 xyzToCieLab(Vector3::Arg c) +{ + // Normalized white point. + const float Xn = 0.950456f; + const float Yn = 1.0f; + const float Zn = 1.088754f; + + float Xr = c.x / Xn; + float Yr = c.y / Yn; + float Zr = c.z / Zn; + + float fx = f(Xr); + float fy = f(Yr); + float fz = f(Zr); + + float L = 116 * fx - 16; + float a = 500 * (fx - fy); + float b = 200 * (fy - fz); + + return Vector3(L, a, b); +} + +static Vector3 rgbToCieLab(Vector3::Arg c) +{ + return xyzToCieLab(rgbToXyz(toLinear(c))); +} + +// h is hue-angle in radians +static Vector3 cieLabToLCh(Vector3::Arg c) +{ + return Vector3(c.x, sqrtf(c.y*c.y + c.z*c.z), atan2f(c.y, c.z)); +} + +static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage) +{ + nvDebugCheck(rgbImage != NULL && LabImage != NULL); + nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height()); + nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3); + + const uint w = rgbImage->width(); + const uint h = LabImage->height(); + + const float * R = rgbImage->channel(0); + const float * G = rgbImage->channel(1); + const float * B = rgbImage->channel(2); + + float * L = LabImage->channel(0); + float * a = LabImage->channel(1); + float * b = LabImage->channel(2); + + const uint count = w*h; + for (uint i = 0; i < count; i++) + { + Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i])); + L[i] = Lab.x; + a[i] = Lab.y; + b[i] = Lab.z; + } +} + + +// Assumes input images are in linear sRGB space. +float nv::cieLabError(const FloatImage * img0, const FloatImage * img1) +{ + if (!sameLayout(img0, img1)) return FLT_MAX; + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + const float * r0 = img0->channel(0); + const float * g0 = img0->channel(1); + const float * b0 = img0->channel(2); + + const float * r1 = img1->channel(0); + const float * g1 = img1->channel(1); + const float * b1 = img1->channel(2); + + double error = 0.0f; + + const uint count = img0->pixelCount(); + for (uint i = 0; i < count; i++) + { + Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i])); + Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i])); + + // @@ Measure Delta E. + Vector3 delta = lab0 - lab1; + + error += length(delta); + } + + return float(error / count); +} + +// Assumes input images are in linear sRGB space. +float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1) +{ + if (!sameLayout(img0, img1)) return FLT_MAX; + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + const float kL = 1; + const float kC = 1; + const float kH = 1; + const float k1 = 0.045f; + const float k2 = 0.015f; + + const float sL = 1; + + const float * r0 = img0->channel(0); + const float * g0 = img0->channel(1); + const float * b0 = img0->channel(2); + + const float * r1 = img1->channel(0); + const float * g1 = img1->channel(1); + const float * b1 = img1->channel(2); + + double error = 0.0f; + + const uint count = img0->pixelCount(); + for (uint i = 0; i < count; ++i) + { + Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i])); + Vector3 lch0 = cieLabToLCh(lab0); + Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i])); + Vector3 lch1 = cieLabToLCh(lab1); + + const float sC = 1 + k1*lch0.x; + const float sH = 1 + k2*lch0.x; + + // @@ Measure Delta E using the 1994 definition + Vector3 labDelta = lab0 - lab1; + Vector3 lchDelta = lch0 - lch1; + + double deltaLsq = powf(lchDelta.x / (kL*sL), 2); + double deltaCsq = powf(lchDelta.y / (kC*sC), 2); + + // avoid possible sqrt of negative value by computing (deltaH/(kH*sH))^2 + double deltaHsq = powf(labDelta.y, 2) + powf(labDelta.z, 2) - powf(lchDelta.y, 2); + deltaHsq /= powf(kH*sH, 2); + + error += sqrt(deltaLsq + deltaCsq + deltaHsq); + } + + return float(error / count); +} + +float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1) +{ + if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { + return FLT_MAX; + } + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + uint w = img0->width(); + uint h = img0->height(); + uint d = img0->depth(); + + FloatImage lab0, lab1; // Original images in CIE-Lab space. + lab0.allocate(3, w, h, d); + lab1.allocate(3, w, h, d); + + // Convert input images to CIE-Lab. + rgbToCieLab(img0, &lab0); + rgbToCieLab(img1, &lab1); + + // @@ Convolve each channel by the corresponding filter. + /* + GaussianFilter LFilter(5); + GaussianFilter aFilter(5); + GaussianFilter bFilter(5); + + lab0.convolve(0, LFilter); + lab0.convolve(1, aFilter); + lab0.convolve(2, bFilter); + + lab1.convolve(0, LFilter); + lab1.convolve(1, aFilter); + lab1.convolve(2, bFilter); + */ + // @@ Measure Delta E between lab0 and lab1. + + return 0.0f; +} + + +// Assumes input images are normal maps. +float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1) +{ + if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { + return FLT_MAX; + } + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + uint w = img0->width(); + uint h = img0->height(); + + const float * x0 = img0->channel(0); + const float * y0 = img0->channel(1); + const float * z0 = img0->channel(2); + + const float * x1 = img1->channel(0); + const float * y1 = img1->channel(1); + const float * z1 = img1->channel(2); + + double error = 0.0f; + + const uint count = w*h; + for (uint i = 0; i < count; i++) + { + Vector3 n0 = Vector3(x0[i], y0[i], z0[i]); + Vector3 n1 = Vector3(x1[i], y1[i], z1[i]); + + n0 = 2.0f * n0 - Vector3(1); + n1 = 2.0f * n1 - Vector3(1); + + n0 = normalizeSafe(n0, Vector3(0), 0.0f); + n1 = normalizeSafe(n1, Vector3(0), 0.0f); + + error += acos(clamp(dot(n0, n1), -1.0f, 1.0f)); + } + + return float(error / count); +} + +float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1) +{ + if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) { + return FLT_MAX; + } + nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4); + + uint w = img0->width(); + uint h = img0->height(); + + const float * x0 = img0->channel(0); + const float * y0 = img0->channel(1); + const float * z0 = img0->channel(2); + + const float * x1 = img1->channel(0); + const float * y1 = img1->channel(1); + const float * z1 = img1->channel(2); + + double error = 0.0f; + + const uint count = w*h; + for (uint i = 0; i < count; i++) + { + Vector3 n0 = Vector3(x0[i], y0[i], z0[i]); + Vector3 n1 = Vector3(x1[i], y1[i], z1[i]); + + n0 = 2.0f * n0 - Vector3(1); + n1 = 2.0f * n1 - Vector3(1); + + n0 = normalizeSafe(n0, Vector3(0), 0.0f); + n1 = normalizeSafe(n1, Vector3(0), 0.0f); + + float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f)); + error += angle * angle; + } + + return float(sqrt(error / count)); +} + diff --git a/src/nvimage/ErrorMetric.h b/src/nvimage/ErrorMetric.h index b875802..aa43d0c 100644 --- a/src/nvimage/ErrorMetric.h +++ b/src/nvimage/ErrorMetric.h @@ -1,5 +1,6 @@ #include "nvimage.h" +#include "FloatImage.h" // For FloatImage::WrapMode namespace nv @@ -9,13 +10,15 @@ namespace nv float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); float rmsAlphaError(const FloatImage * ref, const FloatImage * img); + float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); + float averageAlphaError(const FloatImage * ref, const FloatImage * img); + + float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight); + float cieLabError(const FloatImage * ref, const FloatImage * img); float cieLab94Error(const FloatImage * ref, const FloatImage * img); float spatialCieLabError(const FloatImage * ref, const FloatImage * img); - float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); - float averageAlphaError(const FloatImage * ref, const FloatImage * img); - float averageAngularError(const FloatImage * img0, const FloatImage * img1); float rmsAngularError(const FloatImage * img0, const FloatImage * img1); diff --git a/src/nvimage/FloatImage.cpp b/src/nvimage/FloatImage.cpp index 43164f7..f611328 100644 --- a/src/nvimage/FloatImage.cpp +++ b/src/nvimage/FloatImage.cpp @@ -1,1471 +1,1499 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include "FloatImage.h" -#include "Filter.h" -#include "Image.h" - -#include "nvmath/Color.h" -#include "nvmath/Vector.inl" -#include "nvmath/Matrix.inl" -#include "nvmath/ftoi.h" -#include "nvmath/Gamma.h" - -#include "nvcore/Utils.h" // max -#include "nvcore/Ptr.h" -#include "nvcore/Memory.h" -#include "nvcore/Array.inl" - -#include -#include // memset, memcpy - - -using namespace nv; - - -/// Ctor. -FloatImage::FloatImage() : m_componentCount(0), m_width(0), m_height(0), m_depth(0), - m_pixelCount(0), m_floatCount(0), m_mem(NULL) -{ -} - -/// Ctor. Init from image. -FloatImage::FloatImage(const Image * img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0), - m_pixelCount(0), m_floatCount(0), m_mem(NULL) -{ - initFrom(img); -} - -/// Dtor. -FloatImage::~FloatImage() -{ - free(); -} - - -/// Init the floating point image from a regular image. -void FloatImage::initFrom(const Image * img) -{ - nvCheck(img != NULL); - - allocate(4, img->width(), img->height(), img->depth()); - - float * red_channel = channel(0); - float * green_channel = channel(1); - float * blue_channel = channel(2); - float * alpha_channel = channel(3); - - const uint count = m_pixelCount; - for (uint i = 0; i < count; i++) { - Color32 pixel = img->pixel(i); - red_channel[i] = float(pixel.r) / 255.0f; - green_channel[i] = float(pixel.g) / 255.0f; - blue_channel[i] = float(pixel.b) / 255.0f; - alpha_channel[i] = float(pixel.a) / 255.0f; - } -} - -/// Convert the floating point image to a regular image. -Image * FloatImage::createImage(uint baseComponent/*= 0*/, uint num/*= 4*/) const -{ - nvCheck(num <= 4); - nvCheck(baseComponent + num <= m_componentCount); - - AutoPtr img(new Image()); - img->allocate(m_width, m_height, m_depth); - - for (uint i = 0; i < m_pixelCount; i++) { - - uint c; - uint8 rgba[4]= {0, 0, 0, 0xff}; - - for (c = 0; c < num; c++) { - float f = pixel(baseComponent + c, i); - rgba[c] = nv::clamp(int(255.0f * f), 0, 255); - } - - img->pixel(i) = Color32(rgba[0], rgba[1], rgba[2], rgba[3]); - } - - return img.release(); -} - - -/// Convert the floating point image to a regular image. Correct gamma of rgb, but not alpha. -Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const -{ - nvCheck(m_componentCount == 4); - - AutoPtr img(new Image()); - img->allocate(m_width, m_height, m_depth); - - const float * rChannel = this->channel(0); - const float * gChannel = this->channel(1); - const float * bChannel = this->channel(2); - const float * aChannel = this->channel(3); - - const uint count = m_pixelCount; - for (uint i = 0; i < count; i++) - { - const uint8 r = nv::clamp(int(255.0f * pow(rChannel[i], 1.0f/gamma)), 0, 255); - const uint8 g = nv::clamp(int(255.0f * pow(gChannel[i], 1.0f/gamma)), 0, 255); - const uint8 b = nv::clamp(int(255.0f * pow(bChannel[i], 1.0f/gamma)), 0, 255); - const uint8 a = nv::clamp(int(255.0f * aChannel[i]), 0, 255); - - img->pixel(i) = Color32(r, g, b, a); - } - - return img.release(); -} - -/// Allocate a 2D float image of the given format and the given extents. -void FloatImage::allocate(uint c, uint w, uint h, uint d) -{ - if (m_componentCount != c || m_width != w || m_height != h || m_depth != d) - { - free(); - - m_width = w; - m_height = h; - m_depth = d; - m_componentCount = c; - m_pixelCount = w * h * d; - m_floatCount = m_pixelCount * c; - m_mem = malloc(m_floatCount); - } -} - -/// Free the image, but don't clear the members. -void FloatImage::free() -{ - ::free(m_mem); - m_mem = NULL; -} - -void FloatImage::resizeChannelCount(uint c) -{ - if (m_componentCount != c) { - uint count = m_pixelCount * c; - m_mem = realloc(m_mem, count); - - if (c > m_componentCount) { - memset(m_mem + m_floatCount, 0, (count - m_floatCount) * sizeof(float)); - } - - m_componentCount = c; - m_floatCount = count; - } -} - -void FloatImage::clear(float f/*=0.0f*/) -{ - for (uint i = 0; i < m_floatCount; i++) { - m_mem[i] = f; - } -} - -void FloatImage::clear(uint c, float f/*= 0.0f*/) -{ - float * channel = this->channel(c); - - const uint count = m_pixelCount; - for (uint i = 0; i < count; i++) { - channel[i] = f; - } -} - -void FloatImage::copyChannel(uint src, uint dst) -{ - nvCheck(src < m_componentCount); - nvCheck(dst < m_componentCount); - - const float * srcChannel = this->channel(src); - float * dstChannel = this->channel(dst); - - memcpy(dstChannel, srcChannel, sizeof(float)*m_pixelCount); -} - -void FloatImage::normalize(uint baseComponent) -{ - nvCheck(baseComponent + 3 <= m_componentCount); - - float * xChannel = this->channel(baseComponent + 0); - float * yChannel = this->channel(baseComponent + 1); - float * zChannel = this->channel(baseComponent + 2); - - const uint count = m_pixelCount; - for (uint i = 0; i < count; i++) { - - Vector3 normal(xChannel[i], yChannel[i], zChannel[i]); - normal = normalizeSafe(normal, Vector3(0), 0.0f); - - xChannel[i] = normal.x; - yChannel[i] = normal.y; - zChannel[i] = normal.z; - } -} - -void FloatImage::packNormals(uint baseComponent) -{ - scaleBias(baseComponent, 3, 0.5f, 0.5f); -} - -void FloatImage::expandNormals(uint baseComponent) -{ - scaleBias(baseComponent, 3, 2, -1.0); -} - -void FloatImage::scaleBias(uint baseComponent, uint num, float scale, float bias) -{ - const uint size = m_pixelCount; - - for (uint c = 0; c < num; c++) { - float * ptr = this->channel(baseComponent + c); - - for (uint i = 0; i < size; i++) { - ptr[i] = scale * ptr[i] + bias; - } - } -} - -/// Clamp the elements of the image. -void FloatImage::clamp(uint baseComponent, uint num, float low, float high) -{ - const uint size = m_pixelCount; - - for (uint c = 0; c < num; c++) { - float * ptr = this->channel(baseComponent + c); - - for (uint i = 0; i < size; i++) { - ptr[i] = nv::clamp(ptr[i], low, high); - } - } -} - -/// From gamma to linear space. -void FloatImage::toLinear(uint baseComponent, uint num, float gamma /*= 2.2f*/) -{ - if (gamma == 2.2f) { - for (uint c = 0; c < num; c++) { - float * ptr = this->channel(baseComponent + c); - - powf_11_5(ptr, ptr, m_pixelCount); - } - } else { - exponentiate(baseComponent, num, gamma); - } -} - -/// From linear to gamma space. -void FloatImage::toGamma(uint baseComponent, uint num, float gamma /*= 2.2f*/) -{ - if (gamma == 2.2f) { - for (uint c = 0; c < num; c++) { - float * ptr = this->channel(baseComponent + c); - - powf_5_11(ptr, ptr, m_pixelCount); - } - } else { - exponentiate(baseComponent, num, 1.0f/gamma); - } -} - -/// Exponentiate the elements of the image. -void FloatImage::exponentiate(uint baseComponent, uint num, float power) -{ - const uint size = m_pixelCount; - - for(uint c = 0; c < num; c++) { - float * ptr = this->channel(baseComponent + c); - - for(uint i = 0; i < size; i++) { - ptr[i] = powf(max(0.0f, ptr[i]), power); - } - } -} - -/// Apply linear transform. -void FloatImage::transform(uint baseComponent, const Matrix & m, Vector4::Arg offset) -{ - nvCheck(baseComponent + 4 <= m_componentCount); - - float * r = this->channel(baseComponent + 0); - float * g = this->channel(baseComponent + 1); - float * b = this->channel(baseComponent + 2); - float * a = this->channel(baseComponent + 3); - - const uint size = m_pixelCount; - for (uint i = 0; i < size; i++) - { - Vector4 color = nv::transform(m, Vector4(*r, *g, *b, *a)) + offset; - - *r++ = color.x; - *g++ = color.y; - *b++ = color.z; - *a++ = color.w; - } -} - -void FloatImage::swizzle(uint baseComponent, uint r, uint g, uint b, uint a) -{ - nvCheck(baseComponent + 4 <= m_componentCount); - nvCheck(r < 7 && g < 7 && b < 7 && a < 7); - - float consts[] = { 1.0f, 0.0f, -1.0f }; - float * c[7]; - c[0] = this->channel(baseComponent + 0); - c[1] = this->channel(baseComponent + 1); - c[2] = this->channel(baseComponent + 2); - c[3] = this->channel(baseComponent + 3); - c[4] = consts; - c[5] = consts + 1; - c[6] = consts + 2; - - const uint size = m_pixelCount; - for (uint i = 0; i < size; i++) - { - float tmp[4] = { *c[r], *c[g], *c[b], *c[a] }; - - *c[0]++ = tmp[0]; - *c[1]++ = tmp[1]; - *c[2]++ = tmp[2]; - *c[3]++ = tmp[3]; - } -} - -float FloatImage::sampleNearest(uint c, float x, float y, const WrapMode wm) const -{ - if( wm == WrapMode_Clamp ) return sampleNearestClamp(c, x, y); - else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(c, x, y); - else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(c, x, y); -} - -float FloatImage::sampleLinear(uint c, float x, float y, WrapMode wm) const -{ - if( wm == WrapMode_Clamp ) return sampleLinearClamp(c, x, y); - else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(c, x, y); - else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(c, x, y); -} - -float FloatImage::sampleNearest(uint c, float x, float y, float z, WrapMode wm) const -{ - if( wm == WrapMode_Clamp ) return sampleNearestClamp(c, x, y, z); - else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(c, x, y, z); - else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(c, x, y, z); -} - -float FloatImage::sampleLinear(uint c, float x, float y, float z, WrapMode wm) const -{ - if( wm == WrapMode_Clamp ) return sampleLinearClamp(c, x, y, z); - else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(c, x, y, z); - else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(c, x, y, z); -} - -float FloatImage::sampleNearestClamp(uint c, float x, float y) const -{ - int ix = wrapClamp(iround(x * m_width), m_width); - int iy = wrapClamp(iround(y * m_height), m_height); - return pixel(c, ix, iy, 0); -} - -float FloatImage::sampleNearestRepeat(uint c, float x, float y) const -{ - int ix = wrapRepeat(iround(x * m_width), m_width); - int iy = wrapRepeat(iround(y * m_height), m_height); - return pixel(c, ix, iy, 0); -} - -float FloatImage::sampleNearestMirror(uint c, float x, float y) const -{ - int ix = wrapMirror(iround(x * m_width), m_width); - int iy = wrapMirror(iround(y * m_height), m_height); - return pixel(c, ix, iy, 0); -} - -float FloatImage::sampleNearestClamp(uint c, float x, float y, float z) const -{ - int ix = wrapClamp(iround(x * m_width), m_width); - int iy = wrapClamp(iround(y * m_height), m_height); - int iz = wrapClamp(iround(z * m_depth), m_depth); - return pixel(c, ix, iy, iz); -} - -float FloatImage::sampleNearestRepeat(uint c, float x, float y, float z) const -{ - int ix = wrapRepeat(iround(x * m_width), m_width); - int iy = wrapRepeat(iround(y * m_height), m_height); - int iz = wrapRepeat(iround(z * m_depth), m_depth); - return pixel(c, ix, iy, iz); -} - -float FloatImage::sampleNearestMirror(uint c, float x, float y, float z) const -{ - int ix = wrapMirror(iround(x * m_width), m_width); - int iy = wrapMirror(iround(y * m_height), m_height); - int iz = wrapMirror(iround(z * m_depth), m_depth); - return pixel(c, ix, iy, iz); -} - - -float FloatImage::sampleLinearClamp(uint c, float x, float y) const -{ - const int w = m_width; - const int h = m_height; - - x *= w; - y *= h; - - const float fracX = frac(x); - const float fracY = frac(y); - - const int ix0 = ::clamp(ifloor(x), 0, w-1); - const int iy0 = ::clamp(ifloor(y), 0, h-1); - const int ix1 = ::clamp(ifloor(x)+1, 0, w-1); - const int iy1 = ::clamp(ifloor(y)+1, 0, h-1); - - return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); -} - -float FloatImage::sampleLinearRepeat(uint c, float x, float y) const -{ - const int w = m_width; - const int h = m_height; - - const float fracX = frac(x * w); - const float fracY = frac(y * h); - - // @@ Using floor in some places, but round in others? - int ix0 = ifloor(frac(x) * w); - int iy0 = ifloor(frac(y) * h); - int ix1 = ifloor(frac(x + 1.0f/w) * w); - int iy1 = ifloor(frac(y + 1.0f/h) * h); - - return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); -} - -float FloatImage::sampleLinearMirror(uint c, float x, float y) const -{ - const int w = m_width; - const int h = m_height; - - x *= w; - y *= h; - - const float fracX = frac(x); - const float fracY = frac(y); - - int ix0 = wrapMirror(iround(x), w); - int iy0 = wrapMirror(iround(y), h); - int ix1 = wrapMirror(iround(x) + 1, w); - int iy1 = wrapMirror(iround(y) + 1, h); - - return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); -} - -float FloatImage::sampleLinearClamp(uint c, float x, float y, float z) const -{ - const int w = m_width; - const int h = m_height; - const int d = m_depth; - - x *= w; - y *= h; - z *= d; - - const float fracX = frac(x); - const float fracY = frac(y); - const float fracZ = frac(z); - - // @@ Using floor in some places, but round in others? - const int ix0 = ::clamp(ifloor(x), 0, w-1); - const int iy0 = ::clamp(ifloor(y), 0, h-1); - const int iz0 = ::clamp(ifloor(z), 0, h-1); - const int ix1 = ::clamp(ifloor(x)+1, 0, w-1); - const int iy1 = ::clamp(ifloor(y)+1, 0, h-1); - const int iz1 = ::clamp(ifloor(z)+1, 0, h-1); - - return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); -} - -float FloatImage::sampleLinearRepeat(uint c, float x, float y, float z) const -{ - const int w = m_width; - const int h = m_height; - const int d = m_depth; - - const float fracX = frac(x * w); - const float fracY = frac(y * h); - const float fracZ = frac(z * d); - - int ix0 = ifloor(frac(x) * w); - int iy0 = ifloor(frac(y) * h); - int iz0 = ifloor(frac(z) * d); - int ix1 = ifloor(frac(x + 1.0f/w) * w); - int iy1 = ifloor(frac(y + 1.0f/h) * h); - int iz1 = ifloor(frac(z + 1.0f/d) * d); - - return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); -} - -float FloatImage::sampleLinearMirror(uint c, float x, float y, float z) const -{ - const int w = m_width; - const int h = m_height; - const int d = m_depth; - - x *= w; - y *= h; - z *= d; - - int ix0 = wrapMirror(iround(x), w); - int iy0 = wrapMirror(iround(y), h); - int iz0 = wrapMirror(iround(z), d); - int ix1 = wrapMirror(iround(x) + 1, w); - int iy1 = wrapMirror(iround(y) + 1, h); - int iz1 = wrapMirror(iround(z) + 1, d); - - const float fracX = frac(x); - const float fracY = frac(y); - const float fracZ = frac(z); - - return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); -} - - -/// Fast downsampling using box filter. -/// -/// The extents of the image are divided by two and rounded down. -/// -/// When the size of the image is odd, this uses a polyphase box filter as explained in: -/// http://developer.nvidia.com/object/np2_mipmapping.html -/// -FloatImage * FloatImage::fastDownSample() const -{ - nvDebugCheck(m_depth == 1); - nvDebugCheck(m_width != 1 || m_height != 1); - - AutoPtr dst_image( new FloatImage() ); - - const uint w = max(1, m_width / 2); - const uint h = max(1, m_height / 2); - dst_image->allocate(m_componentCount, w, h); - - // 1D box filter. - if (m_width == 1 || m_height == 1) - { - const uint n = w * h; - - if ((m_width * m_height) & 1) - { - const float scale = 1.0f / (2 * n + 1); - - for(uint c = 0; c < m_componentCount; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint x = 0; x < n; x++) - { - const float w0 = float(n - x); - const float w1 = float(n - 0); - const float w2 = float(1 + x); - - *dst++ = scale * (w0 * src[0] + w1 * src[1] + w2 * src[2]); - src += 2; - } - } - } - else - { - for(uint c = 0; c < m_componentCount; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint x = 0; x < n; x++) - { - *dst = 0.5f * (src[0] + src[1]); - dst++; - src += 2; - } - } - } - } - - // Regular box filter. - else if ((m_width & 1) == 0 && (m_height & 1) == 0) - { - for(uint c = 0; c < m_componentCount; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint y = 0; y < h; y++) - { - for(uint x = 0; x < w; x++) - { - *dst = 0.25f * (src[0] + src[1] + src[m_width] + src[m_width + 1]); - dst++; - src += 2; - } - - src += m_width; - } - } - } - - // Polyphase filters. - else if (m_width & 1 && m_height & 1) - { - nvDebugCheck(m_width == 2 * w + 1); - nvDebugCheck(m_height == 2 * h + 1); - - const float scale = 1.0f / (m_width * m_height); - - for(uint c = 0; c < m_componentCount; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint y = 0; y < h; y++) - { - const float v0 = float(h - y); - const float v1 = float(h - 0); - const float v2 = float(1 + y); - - for (uint x = 0; x < w; x++) - { - const float w0 = float(w - x); - const float w1 = float(w - 0); - const float w2 = float(1 + x); - - float f = 0.0f; - f += v0 * (w0 * src[0 * m_width + 2 * x] + w1 * src[0 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]); - f += v1 * (w0 * src[1 * m_width + 2 * x] + w1 * src[1 * m_width + 2 * x + 1] + w2 * src[1 * m_width + 2 * x + 2]); - f += v2 * (w0 * src[2 * m_width + 2 * x] + w1 * src[2 * m_width + 2 * x + 1] + w2 * src[2 * m_width + 2 * x + 2]); - - *dst = f * scale; - dst++; - } - - src += 2 * m_width; - } - } - } - else if (m_width & 1) - { - nvDebugCheck(m_width == 2 * w + 1); - const float scale = 1.0f / (2 * m_width); - - for(uint c = 0; c < m_componentCount; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - const float w0 = float(w - x); - const float w1 = float(w - 0); - const float w2 = float(1 + x); - - float f = 0.0f; - f += w0 * (src[2 * x + 0] + src[m_width + 2 * x + 0]); - f += w1 * (src[2 * x + 1] + src[m_width + 2 * x + 1]); - f += w2 * (src[2 * x + 2] + src[m_width + 2 * x + 2]); - - *dst = f * scale; - dst++; - } - - src += 2 * m_width; - } - } - } - else if (m_height & 1) - { - nvDebugCheck(m_height == 2 * h + 1); - - const float scale = 1.0f / (2 * m_height); - - for(uint c = 0; c < m_componentCount; c++) - { - const float * src = this->channel(c); - float * dst = dst_image->channel(c); - - for(uint y = 0; y < h; y++) - { - const float v0 = float(h - y); - const float v1 = float(h - 0); - const float v2 = float(1 + y); - - for (uint x = 0; x < w; x++) - { - float f = 0.0f; - f += v0 * (src[0 * m_width + 2 * x] + src[0 * m_width + 2 * x + 1]); - f += v1 * (src[1 * m_width + 2 * x] + src[1 * m_width + 2 * x + 1]); - f += v2 * (src[2 * m_width + 2 * x] + src[2 * m_width + 2 * x + 1]); - - *dst = f * scale; - dst++; - } - - src += 2 * m_width; - } - } - } - - return dst_image.release(); -} - -/// Downsample applying a 1D kernel separately in each dimension. -FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const -{ - const uint w = max(1, m_width / 2); - const uint h = max(1, m_height / 2); - const uint d = max(1, m_depth / 2); - - return resize(filter, w, h, d, wm); -} - -/// Downsample applying a 1D kernel separately in each dimension. -FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm, uint alpha) const -{ - const uint w = max(1, m_width / 2); - const uint h = max(1, m_height / 2); - const uint d = max(1, m_depth / 2); - - return resize(filter, w, h, d, wm, alpha); -} - - -/// Downsample applying a 1D kernel separately in each dimension. -FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const -{ - // @@ Use monophase filters when frac(m_width / w) == 0 - - AutoPtr tmp_image( new FloatImage() ); - AutoPtr dst_image( new FloatImage() ); - - PolyphaseKernel xkernel(filter, m_width, w, 32); - PolyphaseKernel ykernel(filter, m_height, h, 32); - - // @@ Select fastest filtering order: - //if (w * m_height <= h * m_width) - { - tmp_image->allocate(m_componentCount, w, m_height); - dst_image->allocate(m_componentCount, w, h); - - // @@ We could avoid this allocation, write directly to dst_plane. - Array tmp_column(h); - tmp_column.resize(h); - - for (uint c = 0; c < m_componentCount; c++) - { - for (uint z = 0; z < m_depth; z++) - { - float * tmp_plane = tmp_image->plane(c, z); - - for (uint y = 0; y < m_height; y++) { - this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w); - } - - float * dst_plane = dst_image->plane(c, z); - - for (uint x = 0; x < w; x++) { - tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); - - // @@ We could avoid this copy, write directly to dst_plane. - for (uint y = 0; y < h; y++) { - dst_plane[y * w + x] = tmp_column[y]; - } - } - } - } - } - - return dst_image.release(); -} - -/// Downsample applying a 1D kernel separately in each dimension. (for 3d textures) -FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const -{ - // @@ Use monophase filters when frac(m_width / w) == 0 - - // Use the existing 2d version if we are not resizing in the Z axis: - if (m_depth == d) { - return resize(filter, w, h, wm); - } - - AutoPtr tmp_image( new FloatImage() ); - AutoPtr tmp_image2( new FloatImage() ); - AutoPtr dst_image( new FloatImage() ); - - PolyphaseKernel xkernel(filter, m_width, w, 32); - PolyphaseKernel ykernel(filter, m_height, h, 32); - PolyphaseKernel zkernel(filter, m_depth, d, 32); - - tmp_image->allocate(m_componentCount, w, m_height, m_depth); - tmp_image2->allocate(m_componentCount, w, m_height, d); - dst_image->allocate(m_componentCount, w, h, d); - - Array tmp_column(h); - tmp_column.resize(h); - - for (uint c = 0; c < m_componentCount; c++) - { - float * tmp_channel = tmp_image->channel(c); - - // split width in half - for (uint z = 0; z < m_depth; z++ ) { - for (uint y = 0; y < m_height; y++) { - this->applyKernelX(xkernel, y, z, c, wm, tmp_channel + z * m_height * w + y * w); - } - } - - // split depth in half - float * tmp2_channel = tmp_image2->channel(c); - for (uint y = 0; y < m_height; y++) { - for (uint x = 0; x < w; x++) { - tmp_image->applyKernelZ(zkernel, x, y, c, wm, tmp_column.buffer() ); - - for (uint z = 0; z < d; z++) { - tmp2_channel[z * m_height * w + y * w + x] = tmp_column[z]; - } - } - } - - // split height in half - float * dst_channel = dst_image->channel(c); - - for (uint z = 0; z < d; z++ ) { - for (uint x = 0; x < w; x++) { - tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); - - for (uint y = 0; y < h; y++) { - dst_channel[z * h * w + y * w + x] = tmp_column[y]; - } - } - } - } - - return dst_image.release(); -} - - -/// Downsample applying a 1D kernel separately in each dimension. -FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const -{ - nvCheck(alpha < m_componentCount); - - AutoPtr tmp_image( new FloatImage() ); - AutoPtr dst_image( new FloatImage() ); - - PolyphaseKernel xkernel(filter, m_width, w, 32); - PolyphaseKernel ykernel(filter, m_height, h, 32); - - { - tmp_image->allocate(m_componentCount, w, m_height); - dst_image->allocate(m_componentCount, w, h); - - Array tmp_column(h); - tmp_column.resize(h); - - for (uint i = 0; i < m_componentCount; i++) - { - // Process alpha channel first. - uint c; - if (i == 0) c = alpha; - else if (i > alpha) c = i; - else c = i - 1; - - for (uint z = 0; z < m_depth; z++) - { - float * tmp_plane = tmp_image->plane(c, z); - - for (uint y = 0; y < m_height; y++) { - this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w); - } - - float * dst_plane = dst_image->plane(c, z); - - for (uint x = 0; x < w; x++) { - tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); - - // @@ Avoid this copy, write directly to dst_plane. - for (uint y = 0; y < h; y++) { - dst_plane[y * w + x] = tmp_column[y]; - } - } - } - } - } - - return dst_image.release(); -} - - -/// Downsample applying a 1D kernel separately in each dimension. (for 3d textures) -FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const -{ - nvCheck(alpha < m_componentCount); - - // use the existing 2d version if we are a 2d image: - if (m_depth == d) { - return resize( filter, w, h, wm, alpha ); - } - - AutoPtr tmp_image( new FloatImage() ); - AutoPtr tmp_image2( new FloatImage() ); - AutoPtr dst_image( new FloatImage() ); - - PolyphaseKernel xkernel(filter, m_width, w, 32); - PolyphaseKernel ykernel(filter, m_height, h, 32); - PolyphaseKernel zkernel(filter, m_depth, d, 32); - - tmp_image->allocate(m_componentCount, w, m_height, m_depth); - tmp_image2->allocate(m_componentCount, w, m_height, d); - dst_image->allocate(m_componentCount, w, h, d); - - Array tmp_column(h); - tmp_column.resize(h); - - for (uint i = 0; i < m_componentCount; i++) - { - // Process alpha channel first. - uint c; - if (i == 0) c = alpha; - else if (i > alpha) c = i; - else c = i - 1; - - float * tmp_channel = tmp_image->channel(c); - - for (uint z = 0; z < m_depth; z++ ) { - for (uint y = 0; y < m_height; y++) { - this->applyKernelX(xkernel, y, z, c, wm, tmp_channel + z * m_height * w + y * w); - } - } - - float * tmp2_channel = tmp_image2->channel(c); - for (uint y = 0; y < m_height; y++) { - for (uint x = 0; x < w; x++) { - tmp_image->applyKernelZ(zkernel, x, y, c, wm, tmp_column.buffer() ); - - for (uint z = 0; z < d; z++) { - tmp2_channel[z * m_height * w + y * w + x] = tmp_column[z]; - } - } - } - - float * dst_channel = dst_image->channel(c); - - for (uint z = 0; z < d; z++ ) { - for (uint x = 0; x < w; x++) { - tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); - - for (uint y = 0; y < h; y++) { - dst_channel[z * h * w + y * w + x] = tmp_column[y]; - } - } - } - } - - return dst_image.release(); -} - - -void FloatImage::convolve(const Kernel2 & k, uint c, WrapMode wm) -{ - AutoPtr tmpImage(clone()); - - uint w = m_width; - uint h = m_height; - uint d = m_depth; - - for (uint z = 0; z < d; z++) - { - for (uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - pixel(c, x, y, 0) = tmpImage->applyKernelXY(&k, x, y, z, c, wm); - } - } - } -} - - -/// Apply 2D kernel at the given coordinates and return result. -float FloatImage::applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const -{ - nvDebugCheck(k != NULL); - - const uint kernelWindow = k->windowSize(); - const int kernelOffset = int(kernelWindow / 2); - - const float * channel = this->plane(c, z); - - float sum = 0.0f; - for (uint i = 0; i < kernelWindow; i++) - { - int src_y = int(y + i) - kernelOffset; - - for (uint e = 0; e < kernelWindow; e++) - { - int src_x = int(x + e) - kernelOffset; - - int idx = this->index(src_x, src_y, z, wm); - - sum += k->valueAt(e, i) * channel[idx]; - } - } - - return sum; -} - - -/// Apply 1D horizontal kernel at the given coordinates and return result. -float FloatImage::applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const -{ - nvDebugCheck(k != NULL); - - const uint kernelWindow = k->windowSize(); - const int kernelOffset = int(kernelWindow / 2); - - const float * channel = this->channel(c); - - float sum = 0.0f; - for (uint i = 0; i < kernelWindow; i++) - { - const int src_x = int(x + i) - kernelOffset; - const int idx = this->index(src_x, y, z, wm); - - sum += k->valueAt(i) * channel[idx]; - } - - return sum; -} - -/// Apply 1D vertical kernel at the given coordinates and return result. -float FloatImage::applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const -{ - nvDebugCheck(k != NULL); - - const uint kernelWindow = k->windowSize(); - const int kernelOffset = int(kernelWindow / 2); - - const float * channel = this->channel(c); - - float sum = 0.0f; - for (uint i = 0; i < kernelWindow; i++) - { - const int src_y = int(y + i) - kernelOffset; - const int idx = this->index(x, src_y, z, wm); - - sum += k->valueAt(i) * channel[idx]; - } - - return sum; -} - -/// Apply 1D kernel in the z direction at the given coordinates and return result. -float FloatImage::applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const -{ - nvDebugCheck(k != NULL); - - const uint kernelWindow = k->windowSize(); - const int kernelOffset = int(kernelWindow / 2); - - const float * channel = this->channel(c); - - float sum = 0.0f; - for (uint i = 0; i < kernelWindow; i++) - { - const int src_z = int(z + i) - kernelOffset; - const int idx = this->index(x, y, src_z, wm); - - sum += k->valueAt(i) * channel[idx]; - } - - return sum; -} - - -/// Apply 1D horizontal kernel at the given coordinates and return result. -void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * __restrict output) const -{ - const uint length = k.length(); - const float scale = float(length) / float(m_width); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvDebugCheck(right - left <= windowSize); - - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(left + j, y, z, wm); - - sum += k.valueAt(i, j) * channel[idx]; - } - - output[i] = sum; - } -} - -/// Apply 1D vertical kernel at the given coordinates and return result. -void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * __restrict output) const -{ - const uint length = k.length(); - const float scale = float(length) / float(m_height); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvCheck(right - left <= windowSize); - - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(x, j+left, z, wm); - - sum += k.valueAt(i, j) * channel[idx]; - } - - output[i] = sum; - } -} - -/// Apply 1D kernel in the Z direction at the given coordinates and return result. -void FloatImage::applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * __restrict output) const -{ - const uint length = k.length(); - const float scale = float(length) / float(m_height); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvCheck(right - left <= windowSize); - - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(x, y, j+left, wm); - - sum += k.valueAt(i, j) * channel[idx]; - } - - output[i] = sum; - } -} - - -/// Apply 1D horizontal kernel at the given coordinates and return result. -void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * __restrict output) const -{ - const uint length = k.length(); - const float scale = float(length) / float(m_width); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - const float * alpha = this->channel(a); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvDebugCheck(right - left <= windowSize); - - float norm = 0.0f; - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(left + j, y, z, wm); - - float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); - norm += w; - sum += w * channel[idx]; - } - - output[i] = sum / norm; - } -} - -/// Apply 1D vertical kernel at the given coordinates and return result. -void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * __restrict output) const -{ - const uint length = k.length(); - const float scale = float(length) / float(m_height); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - const float * alpha = this->channel(a); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvCheck(right - left <= windowSize); - - float norm = 0; - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(x, j+left, z, wm); - - float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); - norm += w; - sum += w * channel[idx]; - } - - output[i] = sum / norm; - } -} - -/// Apply 1D horizontal kernel at the given coordinates and return result. -void FloatImage::applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * __restrict output) const -{ - const uint length = k.length(); - const float scale = float(length) / float(m_width); - const float iscale = 1.0f / scale; - - const float width = k.width(); - const int windowSize = k.windowSize(); - - const float * channel = this->channel(c); - const float * alpha = this->channel(a); - - for (uint i = 0; i < length; i++) - { - const float center = (0.5f + i) * iscale; - - const int left = (int)floorf(center - width); - const int right = (int)ceilf(center + width); - nvDebugCheck(right - left <= windowSize); - - float norm = 0.0f; - float sum = 0; - for (int j = 0; j < windowSize; ++j) - { - const int idx = this->index(x, y, left + j, wm); - - float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); - norm += w; - sum += w * channel[idx]; - } - - output[i] = sum / norm; - } -} - - -void FloatImage::flipX() -{ - const uint w = m_width; - const uint h = m_height; - const uint d = m_depth; - const uint w2 = w / 2; - - for (uint c = 0; c < m_componentCount; c++) { - for (uint z = 0; z < d; z++) { - for (uint y = 0; y < h; y++) { - float * line = scanline(c, y, z); - for (uint x = 0; x < w2; x++) { - swap(line[x], line[w - 1 - x]); - } - } - } - } -} - -void FloatImage::flipY() -{ - const uint w = m_width; - const uint h = m_height; - const uint d = m_depth; - const uint h2 = h / 2; - - for (uint c = 0; c < m_componentCount; c++) { - for (uint z = 0; z < d; z++) { - for (uint y = 0; y < h2; y++) { - float * src = scanline(c, y, z); - float * dst = scanline(c, h - 1 - y, z); - for (uint x = 0; x < w; x++) { - swap(src[x], dst[x]); - } - } - } - } -} - -void FloatImage::flipZ() -{ - const uint w = m_width; - const uint h = m_height; - const uint d = m_depth; - const uint d2 = d / 2; - - for (uint c = 0; c < m_componentCount; c++) { - for (uint z = 0; z < d2; z++) { - float * src = plane(c, z); - float * dst = plane(c, d - 1 - z); - for (uint i = 0; i < w*h; i++) { - swap(src[i], dst[i]); - } - } - } -} - - - -float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale/*=1*/) const -{ - const uint w = m_width; - const uint h = m_height; - - float coverage = 0.0f; - -#if 0 - const float * alpha = channel(alphaChannel); - - const uint count = m_pixelCount; - for (uint i = 0; i < count; i++) { - if (alpha[i] > alphaRef) coverage += 1.0f; // @@ gt or lt? - } - - return coverage / float(w * h); -#else - const uint n = 8; - - // If we want subsampling: - for (uint y = 0; y < h-1; y++) { - for (uint x = 0; x < w-1; x++) { - - float alpha00 = nv::saturate(pixel(alphaChannel, x+0, y+0, 0) * alphaScale); - float alpha10 = nv::saturate(pixel(alphaChannel, x+1, y+0, 0) * alphaScale); - float alpha01 = nv::saturate(pixel(alphaChannel, x+0, y+1, 0) * alphaScale); - float alpha11 = nv::saturate(pixel(alphaChannel, x+1, y+1, 0) * alphaScale); - - for (uint sy = 0; sy < n; sy++) { - float fy = (sy + 0.5f) / n; - for (uint sx = 0; sx < n; sx++) { - float fx = (sx + 0.5f) / n; - float alpha = alpha00 * (1 - fx) * (1 - fy) + alpha10 * fx * (1 - fy) + alpha01 * (1 - fx) * fy + alpha11 * fx * fy; - if (alpha > alphaRef) coverage += 1.0f; - } - } - } - } - - return coverage / float(w * h * n * n); -#endif -} - -void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int alphaChannel) -{ -#if 0 - float minAlphaRef = 0.0f; - float maxAlphaRef = 1.0f; - float midAlphaRef = 0.5f; - - // Determine desired scale using a binary search. Hardcoded to 8 steps max. - for (int i = 0; i < 10; i++) { - float currentCoverage = alphaTestCoverage(midAlphaRef, alphaChannel); - - if (currentCoverage > desiredCoverage) { - minAlphaRef = midAlphaRef; - } - else if (currentCoverage < desiredCoverage) { - maxAlphaRef = midAlphaRef; - } - else { - break; - } - - midAlphaRef = (minAlphaRef + maxAlphaRef) * 0.5f; - } - - float alphaScale = alphaRef / midAlphaRef; - - // Scale alpha channel. - scaleBias(alphaChannel, 1, alphaScale, 0.0f); - clamp(alphaChannel, 1, 0.0f, 1.0f); -#else - float minAlphaScale = 0.0f; - float maxAlphaScale = 4.0f; - float alphaScale = 1.0f; - - // Determine desired scale using a binary search. Hardcoded to 8 steps max. - for (int i = 0; i < 10; i++) { - float currentCoverage = alphaTestCoverage(alphaRef, alphaChannel, alphaScale); - - if (currentCoverage < desiredCoverage) { - minAlphaScale = alphaScale; - } - else if (currentCoverage > desiredCoverage) { - maxAlphaScale = alphaScale; - } - else { - break; - } - - alphaScale = (minAlphaScale + maxAlphaScale) * 0.5f; - } - - // Scale alpha channel. - scaleBias(alphaChannel, 1, alphaScale, 0.0f); - clamp(alphaChannel, 1, 0.0f, 1.0f); -#endif -#if _DEBUG - alphaTestCoverage(alphaRef, alphaChannel); -#endif -} - -FloatImage* FloatImage::clone() const -{ - FloatImage* copy = new FloatImage(); - - copy->allocate(m_componentCount, m_width, m_height, m_depth); - memcpy(copy->m_mem, m_mem, m_floatCount * sizeof(float)); - - return copy; -} - +// This code is in the public domain -- castanyo@yahoo.es + +#include "FloatImage.h" +#include "Filter.h" +#include "Image.h" + +#include "nvthread/ParallelFor.h" + +#include "nvmath/Color.h" +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/ftoi.h" +#include "nvmath/Gamma.h" + +#include "nvcore/Utils.h" // max +#include "nvcore/Ptr.h" +#include "nvcore/Memory.h" +#include "nvcore/Array.inl" + +#include +#include // memset, memcpy + + +using namespace nv; + + +/// Ctor. +FloatImage::FloatImage() : m_componentCount(0), m_width(0), m_height(0), m_depth(0), + m_pixelCount(0), m_floatCount(0), m_mem(NULL) +{ +} + +FloatImage::FloatImage(const FloatImage & img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0), + m_pixelCount(0), m_floatCount(0), m_mem(NULL) +{ + allocate(img.m_componentCount, img.m_width, img.m_height, img.m_depth); + memcpy(m_mem, img.m_mem, m_floatCount * sizeof(float)); +} + +/// Ctor. Init from image. +FloatImage::FloatImage(const Image * img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0), + m_pixelCount(0), m_floatCount(0), m_mem(NULL) +{ + initFrom(img); +} + +/// Dtor. +FloatImage::~FloatImage() +{ + free(); +} + +/// Init the floating point image from a regular image. +void FloatImage::initFrom(const Image * img) +{ + nvCheck(img != NULL); + + uint channel_count = 3; + if (img->format() == Image::Format_ARGB) channel_count = 4; + + allocate(channel_count, img->width(), img->height(), img->depth()); + + float * red_channel = channel(0); + float * green_channel = channel(1); + float * blue_channel = channel(2); + float * alpha_channel = (channel_count == 4) ? channel(3) : NULL; + + float scale = 1.0f / 255.0f; + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) { + //parallel_for(count, 128, [&](int i) { + Color32 pixel = img->pixel(i); + red_channel[i] = float(pixel.r) * scale; + green_channel[i] = float(pixel.g) * scale; + blue_channel[i] = float(pixel.b) * scale; + if (channel_count == 4) alpha_channel[i] = float(pixel.a) * scale; + }//); +} + +/// Convert the floating point image to a regular image. +Image * FloatImage::createImage(uint baseComponent/*= 0*/, uint num/*= 4*/) const +{ + nvCheck(num <= 4); + nvCheck(baseComponent + num <= m_componentCount); + + AutoPtr img(new Image()); + img->allocate(m_width, m_height, m_depth); + + for (uint i = 0; i < m_pixelCount; i++) { + + uint c; + uint8 rgba[4]= {0, 0, 0, 0xff}; + + for (c = 0; c < num; c++) { + float f = pixel(baseComponent + c, i); + rgba[c] = nv::clamp(int(255.0f * f), 0, 255); + } + + img->pixel(i) = Color32(rgba[0], rgba[1], rgba[2], rgba[3]); + } + + return img.release(); +} + + +/// Convert the floating point image to a regular image. Correct gamma of rgb, but not alpha. +Image * FloatImage::createImageGammaCorrect(float gamma/*= 2.2f*/) const +{ + nvCheck(m_componentCount == 4); + + AutoPtr img(new Image()); + img->allocate(m_width, m_height, m_depth); + + const float * rChannel = this->channel(0); + const float * gChannel = this->channel(1); + const float * bChannel = this->channel(2); + const float * aChannel = this->channel(3); + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) + { + const uint8 r = nv::clamp(int(255.0f * pow(rChannel[i], 1.0f/gamma)), 0, 255); + const uint8 g = nv::clamp(int(255.0f * pow(gChannel[i], 1.0f/gamma)), 0, 255); + const uint8 b = nv::clamp(int(255.0f * pow(bChannel[i], 1.0f/gamma)), 0, 255); + const uint8 a = nv::clamp(int(255.0f * aChannel[i]), 0, 255); + + img->pixel(i) = Color32(r, g, b, a); + } + + return img.release(); +} + +/// Allocate a 2D float image of the given format and the given extents. +void FloatImage::allocate(uint c, uint w, uint h, uint d) +{ + if (m_componentCount != c || m_width != w || m_height != h || m_depth != d) + { + free(); + + m_width = w; + m_height = h; + m_depth = d; + m_componentCount = c; + m_pixelCount = w * h * d; + m_floatCount = m_pixelCount * c; + m_mem = malloc(m_floatCount); + } +} + +/// Free the image, but don't clear the members. +void FloatImage::free() +{ + ::free(m_mem); + m_mem = NULL; +} + +void FloatImage::resizeChannelCount(uint c) +{ + if (m_componentCount != c) { + uint count = m_pixelCount * c; + m_mem = realloc(m_mem, count); + + if (c > m_componentCount) { + memset(m_mem + m_floatCount, 0, (count - m_floatCount) * sizeof(float)); + } + + m_componentCount = c; + m_floatCount = count; + } +} + +void FloatImage::clear(float f/*=0.0f*/) +{ + for (uint i = 0; i < m_floatCount; i++) { + m_mem[i] = f; + } +} + +void FloatImage::clear(uint c, float f/*= 0.0f*/) +{ + float * channel = this->channel(c); + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) { + channel[i] = f; + } +} + +void FloatImage::copyChannel(uint src, uint dst) +{ + nvCheck(src < m_componentCount); + nvCheck(dst < m_componentCount); + + const float * srcChannel = this->channel(src); + float * dstChannel = this->channel(dst); + + memcpy(dstChannel, srcChannel, sizeof(float)*m_pixelCount); +} + +void FloatImage::normalize(uint baseComponent) +{ + nvCheck(baseComponent + 3 <= m_componentCount); + + float * xChannel = this->channel(baseComponent + 0); + float * yChannel = this->channel(baseComponent + 1); + float * zChannel = this->channel(baseComponent + 2); + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) { + + Vector3 normal(xChannel[i], yChannel[i], zChannel[i]); + normal = normalizeSafe(normal, Vector3(0), 0.0f); + + xChannel[i] = normal.x; + yChannel[i] = normal.y; + zChannel[i] = normal.z; + } +} + +void FloatImage::packNormals(uint baseComponent) +{ + scaleBias(baseComponent, 3, 0.5f, 0.5f); +} + +void FloatImage::expandNormals(uint baseComponent) +{ + scaleBias(baseComponent, 3, 2, -1.0); +} + +void FloatImage::scaleBias(uint baseComponent, uint num, float scale, float bias) +{ + const uint size = m_pixelCount; + + for (uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + for (uint i = 0; i < size; i++) { + ptr[i] = scale * ptr[i] + bias; + } + } +} + +/// Clamp the elements of the image. +void FloatImage::clamp(uint baseComponent, uint num, float low, float high) +{ + const uint size = m_pixelCount; + + for (uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + for (uint i = 0; i < size; i++) { + ptr[i] = nv::clamp(ptr[i], low, high); + } + } +} + +/// From gamma to linear space. +void FloatImage::toLinear(uint baseComponent, uint num, float gamma /*= 2.2f*/) +{ + if (gamma == 2.2f) { + for (uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + powf_11_5(ptr, ptr, m_pixelCount); + } + } else { + exponentiate(baseComponent, num, gamma); + } +} + +/// From linear to gamma space. +void FloatImage::toGamma(uint baseComponent, uint num, float gamma /*= 2.2f*/) +{ + if (gamma == 2.2f) { + for (uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + powf_5_11(ptr, ptr, m_pixelCount); + } + } else { + exponentiate(baseComponent, num, 1.0f/gamma); + } +} + +/// Exponentiate the elements of the image. +void FloatImage::exponentiate(uint baseComponent, uint num, float power) +{ + const uint size = m_pixelCount; + + for(uint c = 0; c < num; c++) { + float * ptr = this->channel(baseComponent + c); + + for(uint i = 0; i < size; i++) { + ptr[i] = powf(max(0.0f, ptr[i]), power); + } + } +} + +/// Apply linear transform. +void FloatImage::transform(uint baseComponent, const Matrix & m, Vector4::Arg offset) +{ + nvCheck(baseComponent + 4 <= m_componentCount); + + float * r = this->channel(baseComponent + 0); + float * g = this->channel(baseComponent + 1); + float * b = this->channel(baseComponent + 2); + float * a = this->channel(baseComponent + 3); + + const uint size = m_pixelCount; + for (uint i = 0; i < size; i++) + { + Vector4 color = nv::transform(m, Vector4(*r, *g, *b, *a)) + offset; + + *r++ = color.x; + *g++ = color.y; + *b++ = color.z; + *a++ = color.w; + } +} + +void FloatImage::swizzle(uint baseComponent, uint r, uint g, uint b, uint a) +{ + nvCheck(baseComponent + 4 <= m_componentCount); + nvCheck(r < 7 && g < 7 && b < 7 && a < 7); + + float consts[] = { 1.0f, 0.0f, -1.0f }; + float * c[7]; + c[0] = this->channel(baseComponent + 0); + c[1] = this->channel(baseComponent + 1); + c[2] = this->channel(baseComponent + 2); + c[3] = this->channel(baseComponent + 3); + c[4] = consts; + c[5] = consts + 1; + c[6] = consts + 2; + + const uint size = m_pixelCount; + for (uint i = 0; i < size; i++) + { + float tmp[4] = { *c[r], *c[g], *c[b], *c[a] }; + + *c[0]++ = tmp[0]; + *c[1]++ = tmp[1]; + *c[2]++ = tmp[2]; + *c[3]++ = tmp[3]; + } +} + +float FloatImage::sampleNearest(uint c, float x, float y, const WrapMode wm) const +{ + if( wm == WrapMode_Clamp ) return sampleNearestClamp(c, x, y); + else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(c, x, y); + else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(c, x, y); +} + +float FloatImage::sampleLinear(uint c, float x, float y, WrapMode wm) const +{ + if( wm == WrapMode_Clamp ) return sampleLinearClamp(c, x, y); + else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(c, x, y); + else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(c, x, y); +} + +float FloatImage::sampleNearest(uint c, float x, float y, float z, WrapMode wm) const +{ + if( wm == WrapMode_Clamp ) return sampleNearestClamp(c, x, y, z); + else if( wm == WrapMode_Repeat ) return sampleNearestRepeat(c, x, y, z); + else /*if( wm == WrapMode_Mirror )*/ return sampleNearestMirror(c, x, y, z); +} + +float FloatImage::sampleLinear(uint c, float x, float y, float z, WrapMode wm) const +{ + if( wm == WrapMode_Clamp ) return sampleLinearClamp(c, x, y, z); + else if( wm == WrapMode_Repeat ) return sampleLinearRepeat(c, x, y, z); + else /*if( wm == WrapMode_Mirror )*/ return sampleLinearMirror(c, x, y, z); +} + +float FloatImage::sampleNearestClamp(uint c, float x, float y) const +{ + int ix = wrapClamp(iround(x * m_width), m_width); + int iy = wrapClamp(iround(y * m_height), m_height); + return pixel(c, ix, iy, 0); +} + +float FloatImage::sampleNearestRepeat(uint c, float x, float y) const +{ + int ix = wrapRepeat(iround(x * m_width), m_width); + int iy = wrapRepeat(iround(y * m_height), m_height); + return pixel(c, ix, iy, 0); +} + +float FloatImage::sampleNearestMirror(uint c, float x, float y) const +{ + int ix = wrapMirror(iround(x * m_width), m_width); + int iy = wrapMirror(iround(y * m_height), m_height); + return pixel(c, ix, iy, 0); +} + +float FloatImage::sampleNearestClamp(uint c, float x, float y, float z) const +{ + int ix = wrapClamp(iround(x * m_width), m_width); + int iy = wrapClamp(iround(y * m_height), m_height); + int iz = wrapClamp(iround(z * m_depth), m_depth); + return pixel(c, ix, iy, iz); +} + +float FloatImage::sampleNearestRepeat(uint c, float x, float y, float z) const +{ + int ix = wrapRepeat(iround(x * m_width), m_width); + int iy = wrapRepeat(iround(y * m_height), m_height); + int iz = wrapRepeat(iround(z * m_depth), m_depth); + return pixel(c, ix, iy, iz); +} + +float FloatImage::sampleNearestMirror(uint c, float x, float y, float z) const +{ + int ix = wrapMirror(iround(x * m_width), m_width); + int iy = wrapMirror(iround(y * m_height), m_height); + int iz = wrapMirror(iround(z * m_depth), m_depth); + return pixel(c, ix, iy, iz); +} + + +float FloatImage::sampleLinearClamp(uint c, float x, float y) const +{ + const int w = m_width; + const int h = m_height; + + x *= w; + y *= h; + + const float fracX = frac(x); + const float fracY = frac(y); + + const int ix0 = ::clamp(ifloor(x), 0, w-1); + const int iy0 = ::clamp(ifloor(y), 0, h-1); + const int ix1 = ::clamp(ifloor(x)+1, 0, w-1); + const int iy1 = ::clamp(ifloor(y)+1, 0, h-1); + + return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); +} + +float FloatImage::sampleLinearRepeat(uint c, float x, float y) const +{ + const int w = m_width; + const int h = m_height; + + const float fracX = frac(x * w); + const float fracY = frac(y * h); + + // @@ Using floor in some places, but round in others? + int ix0 = ifloor(frac(x) * w); + int iy0 = ifloor(frac(y) * h); + int ix1 = ifloor(frac(x + 1.0f/w) * w); + int iy1 = ifloor(frac(y + 1.0f/h) * h); + + return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); +} + +float FloatImage::sampleLinearMirror(uint c, float x, float y) const +{ + const int w = m_width; + const int h = m_height; + + x *= w; + y *= h; + + const float fracX = frac(x); + const float fracY = frac(y); + + int ix0 = wrapMirror(iround(x), w); + int iy0 = wrapMirror(iround(y), h); + int ix1 = wrapMirror(iround(x) + 1, w); + int iy1 = wrapMirror(iround(y) + 1, h); + + return bilerp(c, ix0, iy0, ix1, iy1, fracX, fracY); +} + +float FloatImage::sampleLinearClamp(uint c, float x, float y, float z) const +{ + const int w = m_width; + const int h = m_height; + const int d = m_depth; + + x *= w; + y *= h; + z *= d; + + const float fracX = frac(x); + const float fracY = frac(y); + const float fracZ = frac(z); + + //x -= fracX; + //y -= fracY; + //z -= fracZ; + + // @@ Using floor in some places, but round in others? + const int ix0 = ::clamp(ifloor(x), 0, w-1); + const int iy0 = ::clamp(ifloor(y), 0, h-1); + const int iz0 = ::clamp(ifloor(z), 0, d-1); + const int ix1 = ::clamp(ifloor(x)+1, 0, w-1); + const int iy1 = ::clamp(ifloor(y)+1, 0, h-1); + const int iz1 = ::clamp(ifloor(z)+1, 0, d-1); + + return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); +} + +float FloatImage::sampleLinearRepeat(uint c, float x, float y, float z) const +{ + const int w = m_width; + const int h = m_height; + const int d = m_depth; + + const float fracX = frac(x * w); + const float fracY = frac(y * h); + const float fracZ = frac(z * d); + + int ix0 = ifloor(frac(x) * w); + int iy0 = ifloor(frac(y) * h); + int iz0 = ifloor(frac(z) * d); + int ix1 = ifloor(frac(x + 1.0f/w) * w); + int iy1 = ifloor(frac(y + 1.0f/h) * h); + int iz1 = ifloor(frac(z + 1.0f/d) * d); + + return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); +} + +float FloatImage::sampleLinearMirror(uint c, float x, float y, float z) const +{ + const int w = m_width; + const int h = m_height; + const int d = m_depth; + + x *= w; + y *= h; + z *= d; + + int ix0 = wrapMirror(iround(x), w); + int iy0 = wrapMirror(iround(y), h); + int iz0 = wrapMirror(iround(z), d); + int ix1 = wrapMirror(iround(x) + 1, w); + int iy1 = wrapMirror(iround(y) + 1, h); + int iz1 = wrapMirror(iround(z) + 1, d); + + const float fracX = frac(x); + const float fracY = frac(y); + const float fracZ = frac(z); + + return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); +} + + +/// Fast downsampling using box filter. +/// +/// The extents of the image are divided by two and rounded down. +/// +/// When the size of the image is odd, this uses a polyphase box filter as explained in: +/// http://developer.nvidia.com/object/np2_mipmapping.html +/// +FloatImage * FloatImage::fastDownSample() const +{ + nvDebugCheck(m_depth == 1); + nvDebugCheck(m_width != 1 || m_height != 1); + + AutoPtr dst_image( new FloatImage() ); + + const uint w = max(1, m_width / 2); + const uint h = max(1, m_height / 2); + dst_image->allocate(m_componentCount, w, h); + + // 1D box filter. + if (m_width == 1 || m_height == 1) + { + const uint n = w * h; + + if ((m_width * m_height) & 1) + { + const float scale = 1.0f / (2 * n + 1); + + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint x = 0; x < n; x++) + { + const float w0 = float(n - x); + const float w1 = float(n - 0); + const float w2 = float(1 + x); + + *dst++ = scale * (w0 * src[0] + w1 * src[1] + w2 * src[2]); + src += 2; + } + } + } + else + { + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint x = 0; x < n; x++) + { + *dst = 0.5f * (src[0] + src[1]); + dst++; + src += 2; + } + } + } + } + + // Regular box filter. + else if ((m_width & 1) == 0 && (m_height & 1) == 0) + { + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint y = 0; y < h; y++) + { + for(uint x = 0; x < w; x++) + { + *dst = 0.25f * (src[0] + src[1] + src[m_width] + src[m_width + 1]); + dst++; + src += 2; + } + + src += m_width; + } + } + } + + // Polyphase filters. + else if (m_width & 1 && m_height & 1) + { + nvDebugCheck(m_width == 2 * w + 1); + nvDebugCheck(m_height == 2 * h + 1); + + const float scale = 1.0f / (m_width * m_height); + + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint y = 0; y < h; y++) + { + const float v0 = float(h - y); + const float v1 = float(h - 0); + const float v2 = float(1 + y); + + for (uint x = 0; x < w; x++) + { + const float w0 = float(w - x); + const float w1 = float(w - 0); + const float w2 = float(1 + x); + + float f = 0.0f; + f += v0 * (w0 * src[0 * m_width + 2 * x] + w1 * src[0 * m_width + 2 * x + 1] + w2 * src[0 * m_width + 2 * x + 2]); + f += v1 * (w0 * src[1 * m_width + 2 * x] + w1 * src[1 * m_width + 2 * x + 1] + w2 * src[1 * m_width + 2 * x + 2]); + f += v2 * (w0 * src[2 * m_width + 2 * x] + w1 * src[2 * m_width + 2 * x + 1] + w2 * src[2 * m_width + 2 * x + 2]); + + *dst = f * scale; + dst++; + } + + src += 2 * m_width; + } + } + } + else if (m_width & 1) + { + nvDebugCheck(m_width == 2 * w + 1); + const float scale = 1.0f / (2 * m_width); + + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + const float w0 = float(w - x); + const float w1 = float(w - 0); + const float w2 = float(1 + x); + + float f = 0.0f; + f += w0 * (src[2 * x + 0] + src[m_width + 2 * x + 0]); + f += w1 * (src[2 * x + 1] + src[m_width + 2 * x + 1]); + f += w2 * (src[2 * x + 2] + src[m_width + 2 * x + 2]); + + *dst = f * scale; + dst++; + } + + src += 2 * m_width; + } + } + } + else if (m_height & 1) + { + nvDebugCheck(m_height == 2 * h + 1); + + const float scale = 1.0f / (2 * m_height); + + for(uint c = 0; c < m_componentCount; c++) + { + const float * src = this->channel(c); + float * dst = dst_image->channel(c); + + for(uint y = 0; y < h; y++) + { + const float v0 = float(h - y); + const float v1 = float(h - 0); + const float v2 = float(1 + y); + + for (uint x = 0; x < w; x++) + { + float f = 0.0f; + f += v0 * (src[0 * m_width + 2 * x] + src[0 * m_width + 2 * x + 1]); + f += v1 * (src[1 * m_width + 2 * x] + src[1 * m_width + 2 * x + 1]); + f += v2 * (src[2 * m_width + 2 * x] + src[2 * m_width + 2 * x + 1]); + + *dst = f * scale; + dst++; + } + + src += 2 * m_width; + } + } + } + + return dst_image.release(); +} + +/// Downsample applying a 1D kernel separately in each dimension. +FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm) const +{ + const uint w = max(1, m_width / 2); + const uint h = max(1, m_height / 2); + const uint d = max(1, m_depth / 2); + + return resize(filter, w, h, d, wm); +} + +/// Downsample applying a 1D kernel separately in each dimension. +FloatImage * FloatImage::downSample(const Filter & filter, WrapMode wm, uint alpha) const +{ + const uint w = max(1, m_width / 2); + const uint h = max(1, m_height / 2); + const uint d = max(1, m_depth / 2); + + return resize(filter, w, h, d, wm, alpha); +} + + +/// Downsample applying a 1D kernel separately in each dimension. +FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm) const +{ + // @@ Use monophase filters when frac(m_width / w) == 0 + + AutoPtr tmp_image( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); + + // @@ Select fastest filtering order: + //if (w * m_height <= h * m_width) + { + tmp_image->allocate(m_componentCount, w, m_height); + dst_image->allocate(m_componentCount, w, h); + + // @@ We could avoid this allocation, write directly to dst_plane. + //Array tmp_column(h); + //tmp_column.resize(h); + + for (uint c = 0; c < m_componentCount; c++) + { + for (uint z = 0; z < m_depth; z++) + { + float * tmp_plane = tmp_image->plane(c, z); + + for (uint y = 0; y < m_height; y++) { + //parallel_for(m_height, [&](int y) { + this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w); + }//); + + float * dst_plane = dst_image->plane(c, z); + + for (uint x = 0; x < w; x++) { + //parallel_for(w, [&](int x) { + tmp_image->applyKernelY(ykernel, x, z, c, wm, dst_plane + x, w); + + // @@ We could avoid this copy, write directly to dst_plane. + /*for (uint y = 0; y < h; y++) { + dst_plane[y * w + x] = tmp_column[y]; + }*/ + }//); + } + } + } + + return dst_image.release(); +} + +/// Downsample applying a 1D kernel separately in each dimension. (for 3d textures) +FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm) const +{ + // @@ Use monophase filters when frac(m_width / w) == 0 + + // Use the existing 2d version if we are not resizing in the Z axis: + if (m_depth == d) { + return resize(filter, w, h, wm); + } + + AutoPtr tmp_image( new FloatImage() ); + AutoPtr tmp_image2( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); + PolyphaseKernel zkernel(filter, m_depth, d, 32); + + tmp_image->allocate(m_componentCount, w, m_height, m_depth); + tmp_image2->allocate(m_componentCount, w, m_height, d); + dst_image->allocate(m_componentCount, w, h, d); + + Array tmp_column(h); + tmp_column.resize(h); + + for (uint c = 0; c < m_componentCount; c++) + { + float * tmp_channel = tmp_image->channel(c); + + // split width in half + for (uint z = 0; z < m_depth; z++ ) { + for (uint y = 0; y < m_height; y++) { + this->applyKernelX(xkernel, y, z, c, wm, tmp_channel + z * m_height * w + y * w); + } + } + + // split depth in half + float * tmp2_channel = tmp_image2->channel(c); + for (uint y = 0; y < m_height; y++) { + for (uint x = 0; x < w; x++) { + tmp_image->applyKernelZ(zkernel, x, y, c, wm, tmp_column.buffer() ); + + for (uint z = 0; z < d; z++) { + tmp2_channel[z * m_height * w + y * w + x] = tmp_column[z]; + } + } + } + + // split height in half + float * dst_channel = dst_image->channel(c); + + for (uint z = 0; z < d; z++ ) { + for (uint x = 0; x < w; x++) { + tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1); + + for (uint y = 0; y < h; y++) { + dst_channel[z * h * w + y * w + x] = tmp_column[y]; + } + } + } + } + + return dst_image.release(); +} + + +/// Downsample applying a 1D kernel separately in each dimension. +FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode wm, uint alpha) const +{ + nvCheck(alpha < m_componentCount); + + AutoPtr tmp_image( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); + + { + tmp_image->allocate(m_componentCount, w, m_height); + dst_image->allocate(m_componentCount, w, h); + + Array tmp_column(h); + tmp_column.resize(h); + + for (uint i = 0; i < m_componentCount; i++) + { + // Process alpha channel first. + uint c; + if (i == 0) c = alpha; + else if (i > alpha) c = i; + else c = i - 1; + + for (uint z = 0; z < m_depth; z++) + { + float * tmp_plane = tmp_image->plane(c, z); + + for (uint y = 0; y < m_height; y++) { + this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w); + } + + float * dst_plane = dst_image->plane(c, z); + + for (uint x = 0; x < w; x++) { + tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1); + + // @@ Avoid this copy, write directly to dst_plane. + for (uint y = 0; y < h; y++) { + dst_plane[y * w + x] = tmp_column[y]; + } + } + } + } + } + + return dst_image.release(); +} + + +/// Downsample applying a 1D kernel separately in each dimension. (for 3d textures) +FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, WrapMode wm, uint alpha) const +{ + nvCheck(alpha < m_componentCount); + + // use the existing 2d version if we are a 2d image: + if (m_depth == d) { + return resize( filter, w, h, wm, alpha ); + } + + AutoPtr tmp_image( new FloatImage() ); + AutoPtr tmp_image2( new FloatImage() ); + AutoPtr dst_image( new FloatImage() ); + + PolyphaseKernel xkernel(filter, m_width, w, 32); + PolyphaseKernel ykernel(filter, m_height, h, 32); + PolyphaseKernel zkernel(filter, m_depth, d, 32); + + tmp_image->allocate(m_componentCount, w, m_height, m_depth); + tmp_image2->allocate(m_componentCount, w, m_height, d); + dst_image->allocate(m_componentCount, w, h, d); + + Array tmp_column(h); + tmp_column.resize(h); + + for (uint i = 0; i < m_componentCount; i++) + { + // Process alpha channel first. + uint c; + if (i == 0) c = alpha; + else if (i > alpha) c = i; + else c = i - 1; + + float * tmp_channel = tmp_image->channel(c); + + for (uint z = 0; z < m_depth; z++ ) { + for (uint y = 0; y < m_height; y++) { + this->applyKernelX(xkernel, y, z, c, wm, tmp_channel + z * m_height * w + y * w); + } + } + + float * tmp2_channel = tmp_image2->channel(c); + for (uint y = 0; y < m_height; y++) { + for (uint x = 0; x < w; x++) { + tmp_image->applyKernelZ(zkernel, x, y, c, wm, tmp_column.buffer() ); + + for (uint z = 0; z < d; z++) { + tmp2_channel[z * m_height * w + y * w + x] = tmp_column[z]; + } + } + } + + float * dst_channel = dst_image->channel(c); + + for (uint z = 0; z < d; z++ ) { + for (uint x = 0; x < w; x++) { + tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1); + + for (uint y = 0; y < h; y++) { + dst_channel[z * h * w + y * w + x] = tmp_column[y]; + } + } + } + } + + return dst_image.release(); +} + + +void FloatImage::convolve(const Kernel2 & k, uint c, WrapMode wm) +{ + AutoPtr tmpImage(clone()); + + uint w = m_width; + uint h = m_height; + uint d = m_depth; + + for (uint z = 0; z < d; z++) + { + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + pixel(c, x, y, 0) = tmpImage->applyKernelXY(&k, x, y, z, c, wm); + } + } + } +} + + +/// Apply 2D kernel at the given coordinates and return result. +float FloatImage::applyKernelXY(const Kernel2 * k, int x, int y, int z, uint c, WrapMode wm) const +{ + nvDebugCheck(k != NULL); + + const uint kernelWindow = k->windowSize(); + const int kernelOffset = int(kernelWindow / 2); + + const float * channel = this->plane(c, z); + + float sum = 0.0f; + for (uint i = 0; i < kernelWindow; i++) + { + int src_y = int(y + i) - kernelOffset; + + for (uint e = 0; e < kernelWindow; e++) + { + int src_x = int(x + e) - kernelOffset; + + int idx = this->index(src_x, src_y, z, wm); + + sum += k->valueAt(e, i) * channel[idx]; + } + } + + return sum; +} + + +/// Apply 1D horizontal kernel at the given coordinates and return result. +float FloatImage::applyKernelX(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const +{ + nvDebugCheck(k != NULL); + + const uint kernelWindow = k->windowSize(); + const int kernelOffset = int(kernelWindow / 2); + + const float * channel = this->channel(c); + + float sum = 0.0f; + for (uint i = 0; i < kernelWindow; i++) + { + const int src_x = int(x + i) - kernelOffset; + const int idx = this->index(src_x, y, z, wm); + + sum += k->valueAt(i) * channel[idx]; + } + + return sum; +} + +/// Apply 1D vertical kernel at the given coordinates and return result. +float FloatImage::applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const +{ + nvDebugCheck(k != NULL); + + const uint kernelWindow = k->windowSize(); + const int kernelOffset = int(kernelWindow / 2); + + const float * channel = this->channel(c); + + float sum = 0.0f; + for (uint i = 0; i < kernelWindow; i++) + { + const int src_y = int(y + i) - kernelOffset; + const int idx = this->index(x, src_y, z, wm); + + sum += k->valueAt(i) * channel[idx]; + } + + return sum; +} + +/// Apply 1D kernel in the z direction at the given coordinates and return result. +float FloatImage::applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const +{ + nvDebugCheck(k != NULL); + + const uint kernelWindow = k->windowSize(); + const int kernelOffset = int(kernelWindow / 2); + + const float * channel = this->channel(c); + + float sum = 0.0f; + for (uint i = 0; i < kernelWindow; i++) + { + const int src_z = int(z + i) - kernelOffset; + const int idx = this->index(x, y, src_z, wm); + + sum += k->valueAt(i) * channel[idx]; + } + + return sum; +} + + +/// Apply 1D horizontal kernel at the given coordinates and return result. +void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * __restrict output) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_width); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvDebugCheck(right - left <= windowSize); + + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(left + j, y, z, wm); + + sum += k.valueAt(i, j) * channel[idx]; + } + + output[i] = sum; + } +} + +/// Apply 1D vertical kernel at the given coordinates and return result. +void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * __restrict output, int output_stride) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_height); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvCheck(right - left <= windowSize); + + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, j+left, z, wm); + + sum += k.valueAt(i, j) * channel[idx]; + } + + output[i * output_stride] = sum; + } +} + +/// Apply 1D kernel in the Z direction at the given coordinates and return result. +void FloatImage::applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * __restrict output) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_height); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvCheck(right - left <= windowSize); + + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, y, j+left, wm); + + sum += k.valueAt(i, j) * channel[idx]; + } + + output[i] = sum; + } +} + + +/// Apply 1D horizontal kernel at the given coordinates and return result. +void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * __restrict output) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_width); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + const float * alpha = this->channel(a); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvDebugCheck(right - left <= windowSize); + + float norm = 0.0f; + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(left + j, y, z, wm); + + float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); + norm += w; + sum += w * channel[idx]; + } + + output[i] = sum / norm; + } +} + +/// Apply 1D vertical kernel at the given coordinates and return result. +void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * __restrict output, int output_stride) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_height); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + const float * alpha = this->channel(a); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvCheck(right - left <= windowSize); + + float norm = 0; + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, j+left, z, wm); + + float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); + norm += w; + sum += w * channel[idx]; + } + + output[i * output_stride] = sum / norm; + } +} + +/// Apply 1D horizontal kernel at the given coordinates and return result. +void FloatImage::applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * __restrict output) const +{ + const uint length = k.length(); + const float scale = float(length) / float(m_width); + const float iscale = 1.0f / scale; + + const float width = k.width(); + const int windowSize = k.windowSize(); + + const float * channel = this->channel(c); + const float * alpha = this->channel(a); + + for (uint i = 0; i < length; i++) + { + const float center = (0.5f + i) * iscale; + + const int left = (int)floorf(center - width); + const int right = (int)ceilf(center + width); + nvDebugCheck(right - left <= windowSize); + + float norm = 0.0f; + float sum = 0; + for (int j = 0; j < windowSize; ++j) + { + const int idx = this->index(x, y, left + j, wm); + + float w = k.valueAt(i, j) * (alpha[idx] + (1.0f / 256.0f)); + norm += w; + sum += w * channel[idx]; + } + + output[i] = sum / norm; + } +} + + +void FloatImage::flipX() +{ + const uint w = m_width; + const uint h = m_height; + const uint d = m_depth; + const uint w2 = w / 2; + + for (uint c = 0; c < m_componentCount; c++) { + for (uint z = 0; z < d; z++) { + for (uint y = 0; y < h; y++) { + float * line = scanline(c, y, z); + for (uint x = 0; x < w2; x++) { + swap(line[x], line[w - 1 - x]); + } + } + } + } +} + +void FloatImage::flipY() +{ + const uint w = m_width; + const uint h = m_height; + const uint d = m_depth; + const uint h2 = h / 2; + + for (uint c = 0; c < m_componentCount; c++) { + for (uint z = 0; z < d; z++) { + for (uint y = 0; y < h2; y++) { + float * src = scanline(c, y, z); + float * dst = scanline(c, h - 1 - y, z); + for (uint x = 0; x < w; x++) { + swap(src[x], dst[x]); + } + } + } + } +} + +void FloatImage::flipZ() +{ + const uint w = m_width; + const uint h = m_height; + const uint d = m_depth; + const uint d2 = d / 2; + + for (uint c = 0; c < m_componentCount; c++) { + for (uint z = 0; z < d2; z++) { + float * src = plane(c, z); + float * dst = plane(c, d - 1 - z); + for (uint i = 0; i < w*h; i++) { + swap(src[i], dst[i]); + } + } + } +} + + + +float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale/*=1*/) const +{ + const uint w = m_width; + const uint h = m_height; + + float coverage = 0.0f; + +#if 0 + const float * alpha = channel(alphaChannel); + + const uint count = m_pixelCount; + for (uint i = 0; i < count; i++) { + if (alpha[i] > alphaRef) coverage += 1.0f; // @@ gt or lt? + } + + return coverage / float(w * h); +#else + const uint n = 8; + + // If we want subsampling: + for (uint y = 0; y < h-1; y++) { + for (uint x = 0; x < w-1; x++) { + + float alpha00 = nv::saturate(pixel(alphaChannel, x+0, y+0, 0) * alphaScale); + float alpha10 = nv::saturate(pixel(alphaChannel, x+1, y+0, 0) * alphaScale); + float alpha01 = nv::saturate(pixel(alphaChannel, x+0, y+1, 0) * alphaScale); + float alpha11 = nv::saturate(pixel(alphaChannel, x+1, y+1, 0) * alphaScale); + + for (uint sy = 0; sy < n; sy++) { + float fy = (sy + 0.5f) / n; + for (uint sx = 0; sx < n; sx++) { + float fx = (sx + 0.5f) / n; + float alpha = alpha00 * (1 - fx) * (1 - fy) + alpha10 * fx * (1 - fy) + alpha01 * (1 - fx) * fy + alpha11 * fx * fy; + if (alpha > alphaRef) coverage += 1.0f; + } + } + } + } + + return coverage / float(w * h * n * n); +#endif +} + +void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int alphaChannel) +{ +#if 0 + float minAlphaRef = 0.0f; + float maxAlphaRef = 1.0f; + float midAlphaRef = 0.5f; + + // Determine desired scale using a binary search. Hardcoded to 8 steps max. + for (int i = 0; i < 10; i++) { + float currentCoverage = alphaTestCoverage(midAlphaRef, alphaChannel); + + if (currentCoverage > desiredCoverage) { + minAlphaRef = midAlphaRef; + } + else if (currentCoverage < desiredCoverage) { + maxAlphaRef = midAlphaRef; + } + else { + break; + } + + midAlphaRef = (minAlphaRef + maxAlphaRef) * 0.5f; + } + + float alphaScale = alphaRef / midAlphaRef; + + // Scale alpha channel. + scaleBias(alphaChannel, 1, alphaScale, 0.0f); + clamp(alphaChannel, 1, 0.0f, 1.0f); +#else + float minAlphaScale = 0.0f; + float maxAlphaScale = 4.0f; + float alphaScale = 1.0f; + float bestAlphaScale = 1.0f; + float bestError = NV_FLOAT_MAX; + + // Determine desired scale using a binary search. Hardcoded to 10 steps max. + for (int i = 0; i < 10; i++) { + float currentCoverage = alphaTestCoverage(alphaRef, alphaChannel, alphaScale); + + float error = fabsf(currentCoverage - desiredCoverage); + if (error < bestError) { + bestError = error; + bestAlphaScale = alphaScale; + } + + if (currentCoverage < desiredCoverage) { + minAlphaScale = alphaScale; + } + else if (currentCoverage > desiredCoverage) { + maxAlphaScale = alphaScale; + } + else { + break; + } + + alphaScale = (minAlphaScale + maxAlphaScale) * 0.5f; + } + + // Scale alpha channel. + scaleBias(alphaChannel, 1, bestAlphaScale, 0.0f); + clamp(alphaChannel, 1, 0.0f, 1.0f); +#endif +#if _DEBUG + alphaTestCoverage(alphaRef, alphaChannel); +#endif +} + +FloatImage* FloatImage::clone() const +{ + FloatImage* copy = new FloatImage(); + + copy->allocate(m_componentCount, m_width, m_height, m_depth); + memcpy(copy->m_mem, m_mem, m_floatCount * sizeof(float)); + + return copy; +} + diff --git a/src/nvimage/FloatImage.h b/src/nvimage/FloatImage.h index 10a236f..42cd86a 100644 --- a/src/nvimage/FloatImage.h +++ b/src/nvimage/FloatImage.h @@ -35,6 +35,7 @@ namespace nv }; NVIMAGE_API FloatImage(); + NVIMAGE_API FloatImage(const FloatImage & img); NVIMAGE_API FloatImage(const Image * img); NVIMAGE_API virtual ~FloatImage(); @@ -92,10 +93,10 @@ namespace nv NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const; NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const; - NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output) const; + NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const; NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const; diff --git a/src/nvimage/Image.cpp b/src/nvimage/Image.cpp index 3d99108..0ac84a6 100644 --- a/src/nvimage/Image.cpp +++ b/src/nvimage/Image.cpp @@ -42,13 +42,21 @@ const Image & Image::operator=(const Image & img) void Image::allocate(uint w, uint h, uint d/*= 1*/) { - free(); m_width = w; m_height = h; m_depth = d; m_data = realloc(m_data, w * h * d); } +void Image::acquire(Color32 * data, uint w, uint h, uint d/*= 1*/) +{ + free(); + m_width = w; + m_height = h; + m_depth = d; + m_data = data; +} + void Image::resize(uint w, uint h, uint d/*= 1*/) { Image img; diff --git a/src/nvimage/Image.h b/src/nvimage/Image.h index 643fd9d..e8f6fa6 100644 --- a/src/nvimage/Image.h +++ b/src/nvimage/Image.h @@ -34,6 +34,7 @@ namespace nv void allocate(uint w, uint h, uint d = 1); + void acquire(Color32 * data, uint w, uint h, uint d = 1); bool load(const char * name); void resize(uint w, uint h, uint d = 1); diff --git a/src/nvimage/ImageIO.cpp b/src/nvimage/ImageIO.cpp index 03c0410..047cf39 100644 --- a/src/nvimage/ImageIO.cpp +++ b/src/nvimage/ImageIO.cpp @@ -8,6 +8,8 @@ #include "DirectDrawSurface.h" #include "PixelFormat.h" +#include "nvthread/ParallelFor.h" + #include "nvmath/Color.h" #include "nvmath/Half.h" @@ -19,31 +21,31 @@ #include "nvcore/TextWriter.h" // Extern -#if defined(HAVE_FREEIMAGE) +#if defined(NV_HAVE_FREEIMAGE) # include // If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms. -# undef HAVE_JPEG -# undef HAVE_PNG -# undef HAVE_TIFF -# undef HAVE_OPENEXR +# undef NV_HAVE_JPEG +# undef NV_HAVE_PNG +# undef NV_HAVE_TIFF +# undef NV_HAVE_OPENEXR #endif -#if defined(HAVE_JPEG) +#if defined(NV_HAVE_JPEG) extern "C" { # include } #endif -#if defined(HAVE_PNG) +#if defined(NV_HAVE_PNG) # include #endif -#if defined(HAVE_TIFF) +#if defined(NV_HAVE_TIFF) # define _TIFF_DATA_TYPEDEFS_ # include #endif -#if defined(HAVE_OPENEXR) +#if defined(NV_HAVE_OPENEXR) # include # include # include @@ -52,7 +54,7 @@ extern "C" { # include #endif -#if defined(HAVE_STBIMAGE) +#if defined(NV_HAVE_STBIMAGE) # define STBI_NO_STDIO # include #endif @@ -303,6 +305,51 @@ static bool saveTGA(Stream & s, const Image * img) return true; } +#pragma optimize("", off) + +// Save BMP image. +static bool saveBMP(Stream & s, const Image * img) +{ + int w = img->width(); + int h = img->height(); + int image_size = w * h * 3; + + BmpFileHeader header; + zero(header); + header.type = BM_TYPE; + header.size = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE + image_size; + header.offBits = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE; + + BmpInfoHeader info; + zero(info); + info.size = BITMAPINFOHEADER_SIZE; + info.width = w; + info.height = h; + info.planes = 1; + info.bitCount = 24; + info.sizeImage = image_size; + info.xPelsPerMeter = 2000; + info.yPelsPerMeter = 2000; + + s << header; + s << info; + + nv::Array data; + data.resize(3 * w); + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + data[x * 3 + 0] = img->pixel(x, h - y - 1).b; + data[x * 3 + 1] = img->pixel(x, h - y - 1).g; + data[x * 3 + 2] = img->pixel(x, h - y - 1).r; + } + + s.serialize(data.buffer(), data.size()); + } + + return true; +} + /*static Image * loadPPM(Stream & s) { // @@ @@ -324,7 +371,10 @@ static bool savePPM(Stream & s, const Image * img) writer.writeString("255\n"); for (uint i = 0; i < w * h; i++) { Color32 c = img->pixel(i); - s << (uint8_t&)c.r << (uint8_t&)c.g << (uint8_t&)c.b; + uint8 r = c.r; // current version of apple's llvm compiling for arm64 doesn't like taking the address of a bit-field. Workaround by using the stack + uint8 g = c.g; + uint8 b = c.b; + s << r << g << b; } return true; @@ -653,7 +703,7 @@ static bool saveFloatDDS(Stream & s, const FloatImage * img, uint base_component } -#if defined(HAVE_PNG) +#if defined(NV_HAVE_PNG) static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length) { @@ -902,9 +952,9 @@ static bool savePNG(Stream & s, const Image * img, const char ** tags/*=NULL*/) return true; } -#endif // defined(HAVE_PNG) +#endif // defined(NV_HAVE_PNG) -#if defined(HAVE_JPEG) +#if defined(NV_HAVE_JPEG) static void init_source (j_decompress_ptr /*cinfo*/){ } @@ -1011,9 +1061,9 @@ static Image * loadJPG(Stream & s) return img.release(); } -#endif // defined(HAVE_JPEG) +#endif // defined(NV_HAVE_JPEG) -#if defined(HAVE_TIFF) +#if defined(NV_HAVE_TIFF) /* static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size) @@ -1207,9 +1257,9 @@ static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint return true; } -#endif // defined(HAVE_TIFF) +#endif // defined(NV_HAVE_TIFF) -#if defined(HAVE_OPENEXR) +#if defined(NV_HAVE_OPENEXR) namespace { @@ -1348,10 +1398,10 @@ static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint return true; } -#endif // defined(HAVE_OPENEXR) +#endif // defined(NV_HAVE_OPENEXR) -#if defined(HAVE_FREEIMAGE) +#if defined(NV_HAVE_FREEIMAGE) static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle) { @@ -1688,10 +1738,10 @@ bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Fl return result; } -#endif // defined(HAVE_FREEIMAGE) +#endif // defined(NV_HAVE_FREEIMAGE) -#if defined(HAVE_STBIMAGE) +#if defined(NV_HAVE_STBIMAGE) static Image * loadSTB(Stream & s) { @@ -1704,28 +1754,22 @@ static Image * loadSTB(Stream & s) int w, h, n; uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4); + // @@ Hack: STB is returning n=4, because we request 4 components, even when input only has 3. + n = 3; + delete [] buffer; if (data != NULL) { Image * img = new Image; - img->allocate(w, h); + img->acquire((Color32 *)data, w, h); img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB); - for (int y = 0; y < h; ++y) - { - nv::Color32* dest = img->scanline(y); - uint8* src = data + y * w * 4; - - for (int x = 0; x < w; ++x) - { - dest[x].r = src[x * 4 + 0]; - dest[x].g = src[x * 4 + 1]; - dest[x].b = src[x * 4 + 2]; - dest[x].a = src[x * 4 + 3]; - } - } - - free(data); + int count = w * h; + for (int i = 0; i < count; ++i) { + //parallel_for(count, 128, [&](int i) { + Color32 & pixel = img->pixel(i); + swap(pixel.r, pixel.b); + }//); return img; } @@ -1766,7 +1810,7 @@ static FloatImage * loadFloatSTB(Stream & s) return NULL; } -#endif // defined(HAVE_STBIMAGE) +#endif // defined(NV_HAVE_STBIMAGE) @@ -1804,32 +1848,33 @@ Image * nv::ImageIO::load(const char * fileName, Stream & s) return loadPPM(s); }*/ -#if defined(HAVE_JPEG) +#if defined(NV_HAVE_JPEG) if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) { return loadJPG(s); } #endif -#if defined(HAVE_PNG) +#if defined(NV_HAVE_PNG) if (strCaseDiff(extension, ".png") == 0) { return loadPNG(s); } #endif -#if defined(HAVE_FREEIMAGE) +#if defined(NV_HAVE_FREEIMAGE) FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { return loadFreeImage(fif, s); } #endif -#if defined(HAVE_STBIMAGE) +#if defined(NV_HAVE_STBIMAGE) return loadSTB(s); #endif return NULL; } + bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/) { nvDebugCheck(fileName != NULL); @@ -1838,6 +1883,10 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con const char * extension = Path::extension(fileName); + if (strCaseDiff(extension, ".bmp") == 0) { + return saveBMP(s, img); + } + if (strCaseDiff(extension, ".tga") == 0) { return saveTGA(s, img); } @@ -1846,13 +1895,13 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con return savePPM(s, img); } -#if defined(HAVE_PNG) +#if defined(NV_HAVE_PNG) if (strCaseDiff(extension, ".png") == 0) { return savePNG(s, img, tags); } #endif -#if defined(HAVE_FREEIMAGE) +#if defined(NV_HAVE_FREEIMAGE) FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { return saveFreeImage(fif, s, img, tags); @@ -1899,27 +1948,27 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s) return loadFloatPFM(s); }*/ -#if defined(HAVE_TIFF) +#if defined(NV_HAVE_TIFF) #pragma NV_MESSAGE("TODO: Load TIFF from stream.") if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) { return loadFloatTIFF(fileName, s); } #endif -#if defined(HAVE_OPENEXR) +#if defined(NV_HAVE_OPENEXR) #pragma NV_MESSAGE("TODO: Load EXR from stream.") if (strCaseDiff(extension, ".exr") == 0) { return loadFloatEXR(fileName, s); } #endif -#if defined(HAVE_STBIMAGE) +#if defined(NV_HAVE_STBIMAGE) if (strCaseDiff(extension, ".hdr") == 0) { return loadFloatSTB(s); } #endif -#if defined(HAVE_FREEIMAGE) +#if defined(NV_HAVE_FREEIMAGE) FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { return loadFloatFreeImage(fif, s); @@ -1961,7 +2010,7 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage return saveFloatPFM(s, fimage, baseComponent, componentCount); }*/ -#if defined(HAVE_FREEIMAGE) +#if defined(NV_HAVE_FREEIMAGE) FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount); @@ -2005,14 +2054,15 @@ bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, ui } const char * extension = Path::extension(fileName); + NV_UNUSED(extension); -#if defined(HAVE_OPENEXR) +#if defined(NV_HAVE_OPENEXR) if (strCaseDiff(extension, ".exr") == 0) { return saveFloatEXR(fileName, fimage, baseComponent, componentCount); } #endif -#if defined(HAVE_TIFF) +#if defined(NV_HAVE_TIFF) if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) { return saveFloatTIFF(fileName, fimage, baseComponent, componentCount); } diff --git a/src/nvimage/KtxFile.cpp b/src/nvimage/KtxFile.cpp index de075bd..033ad75 100644 --- a/src/nvimage/KtxFile.cpp +++ b/src/nvimage/KtxFile.cpp @@ -1,6 +1,7 @@ // This code is in the public domain -- Ignacio Castańo #include "KtxFile.h" +#include "nvcore/StdStream.h" using namespace nv; @@ -10,6 +11,8 @@ static const uint8 fileIdentifier[12] = { 0x0D, 0x0A, 0x1A, 0x0A }; +namespace nv +{ KtxHeader::KtxHeader() { memcpy(identifier, fileIdentifier, 12); @@ -19,8 +22,8 @@ KtxHeader::KtxHeader() { glType = 0; glTypeSize = 1; glFormat = 0; - glInternalFormat = KTX_RGBA; - glBaseInternalFormat = KTX_RGBA; + glInternalFormat = KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1; + glBaseInternalFormat = KTX_BASE_INTERNAL_RGB; pixelWidth = 0; pixelHeight = 0; pixelDepth = 0; @@ -31,9 +34,9 @@ KtxHeader::KtxHeader() { } -Stream & operator<< (Stream & s, DDSHeader & header) { +Stream & operator<< (Stream & s, KtxHeader & header) { s.serialize(header.identifier, 12); - s << header.endiannes << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat; + s << header.endianness << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat; s << header.pixelWidth << header.pixelHeight << header.pixelDepth; s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels; s << header.bytesOfKeyValueData; @@ -41,7 +44,7 @@ Stream & operator<< (Stream & s, DDSHeader & header) { } -KtxFile::KtxFile() { +/*KtxFile::KtxFile() { } KtxFile::~KtxFile() { } @@ -49,7 +52,7 @@ KtxFile::~KtxFile() { void KtxFile::addKeyValue(const char * key, const char * value) { keyArray.append(key); valueArray.append(value); - bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1; + header.bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1; } @@ -77,7 +80,8 @@ Stream & operator<< (Stream & s, KtxFile & file) { } return s; -} +}*/ +} // nv diff --git a/src/nvimage/KtxFile.h b/src/nvimage/KtxFile.h index 9f89590..b1b3674 100644 --- a/src/nvimage/KtxFile.h +++ b/src/nvimage/KtxFile.h @@ -6,6 +6,7 @@ #include "nvimage.h" #include "nvcore/StrLib.h" +#include "nvcore/Array.h" // KTX File format specification: // http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key @@ -14,22 +15,99 @@ namespace nv { class Stream; - // GL types (Table 3.2) - const uint KTX_UNSIGNED_BYTE; - const uint KTX_UNSIGNED_SHORT_5_6_5; - // ... - - // GL formats (Table 3.3) - // ... - - // GL internal formats (Table 3.12, 3.13) - // ... - - // GL base internal format. (Table 3.11) - const uint KTX_RGB; - const uint KTX_RGBA; - const uint KTX_ALPHA; - // ... + // GL types + const uint KTX_UNSIGNED_BYTE = 0x1401; + const uint KTX_BYTE = 0x1400; + const uint KTX_UNSIGNED_SHORT = 0x1403; + const uint KTX_SHORT = 0x1402; + const uint KTX_UNSIGNED_INT = 0x1405; + const uint KTX_INT = 0x1404; + const uint KTX_FLOAT = 0x1406; + const uint KTX_UNSIGNED_BYTE_3_3_2 = 0x8032; + const uint KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362; + const uint KTX_UNSIGNED_SHORT_5_6_5 = 0x8363; + const uint KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364; + const uint KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033; + const uint KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365; + const uint KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034; + const uint KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366; + const uint KTX_UNSIGNED_INT_8_8_8_8 = 0x8035; + const uint KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367; + const uint KTX_UNSIGNED_INT_10_10_10_2 = 0x8036; + const uint KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368; + + // GL formats + const uint KTX_FORMAT_RED = 0x1903; + const uint KTX_FORMAT_RG = 0x8227; + const uint KTX_FORMAT_RGB = 0x1907; + const uint KTX_FORMAT_BGR = 0x80E0; + const uint KTX_FORMAT_RGBA = 0x1908; + const uint KTX_FORMAT_BGRA = 0x80E1; + const uint KTX_FORMAT_RED_INTEGER = 0x8D94; + const uint KTX_FORMAT_RG_INTEGER = 0x8228; + const uint KTX_FORMAT_RGB_INTEGER = 0x8D98; + const uint KTX_FORMAT_BGR_INTEGER = 0x8D9A; + const uint KTX_FORMAT_RGBA_INTEGER = 0x8D99; + const uint KTX_FORMAT_BGRA_INTEGER = 0x8D9B; + const uint KTX_FORMAT_STENCIL_INDEX = 0x1901; + const uint KTX_FORMAT_DEPTH_COMPONENT = 0x1902; + const uint KTX_FORMAT_DEPTH_STENCIL = 0x84F9; + + // GL internal formats + // BC1 + const uint KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1 = 0x83F0; + const uint KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 = 0x8C4C; + // BC1a + const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1 = 0x83F1; + const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 = 0x8C4D; + // BC2 + const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3 = 0x83F2; + const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 = 0x8C4E; + // BC3 + const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5 = 0x83F3; + const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 = 0x8C4F; + // BC4 + const uint KTX_INTERNAL_COMPRESSED_RED_RGTC1 = 0x8DBB; + const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 = 0x8DBC; + // BC5 + const uint KTX_INTERNAL_COMPRESSED_RG_RGTC2 = 0x8DBD; + const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 = 0x8DBE; + // BC6 + const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F; + const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E; + // BC7 + const uint KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C; + const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D; + + // ETC + const uint KTX_INTERNAL_COMPRESSED_RGB_ETC1 = 0x8D64; + const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC1 = 0x8D64; // ??? + + // ETC2 + const uint KTX_INTERNAL_COMPRESSED_RED_EAC = 0x9270; + const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_EAC = 0x9271; + + const uint KTX_INTERNAL_COMPRESSED_RG_EAC = 0x9272; + const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_EAC = 0x9273; + + const uint KTX_INTERNAL_COMPRESSED_RGB_ETC2 = 0x9274; + const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC2 = 0x9275; + + const uint KTX_INTERNAL_COMPRESSED_RGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9276; + const uint KTX_INTERNAL_COMPRESSED_SRGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9277; + + const uint KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC = 0x9278; + const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC = 0x9279; + + + // GL base internal formats + const uint KTX_BASE_INTERNAL_DEPTH_COMPONENT = 0x1902; + const uint KTX_BASE_INTERNAL_DEPTH_STENCIL = 0x84F9; + const uint KTX_BASE_INTERNAL_RED = 0x1903; + const uint KTX_BASE_INTERNAL_RG = 0x8227; + const uint KTX_BASE_INTERNAL_RGB = 0x1907; + const uint KTX_BASE_INTERNAL_RGBA = 0x1908; + const uint KTX_BASE_INTERNAL_STENCIL_INDEX = 0x1901; struct KtxHeader { @@ -52,10 +130,10 @@ namespace nv }; - NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); + NVIMAGE_API Stream & operator<< (Stream & s, KtxHeader & header); - struct KtxFile { +/* struct KtxFile { KtxFile(); ~KtxFile(); @@ -66,10 +144,9 @@ namespace nv Array keyArray; Array valueArray; - }; - NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file); + NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file);*/ /* diff --git a/src/nvimage/NormalMap.cpp b/src/nvimage/NormalMap.cpp index 559e4f8..754166a 100644 --- a/src/nvimage/NormalMap.cpp +++ b/src/nvimage/NormalMap.cpp @@ -1,208 +1,208 @@ -// Copyright NVIDIA Corporation 2007 -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#include "NormalMap.h" -#include "Filter.h" -#include "FloatImage.h" -#include "Image.h" - -#include "nvmath/Color.inl" -#include "nvmath/Vector.h" - -#include "nvcore/Ptr.h" - -#include // memcpy - - -using namespace nv; - -// Create normal map using the given kernels. -static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv) -{ - nvDebugCheck(kdu != NULL); - nvDebugCheck(kdv != NULL); - nvDebugCheck(img != NULL); - - const uint w = img->width(); - const uint h = img->height(); - - AutoPtr fimage(new FloatImage()); - fimage->allocate(4, w, h); - - // Compute height and store in alpha channel: - float * alphaChannel = fimage->channel(3); - for(uint i = 0; i < w * h; i++) - { - Vector4 color = toVector4(img->pixel(i)); - alphaChannel[i] = dot(color, heightWeights); - } - - float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor. - - for(uint y = 0; y < h; y++) - { - for(uint x = 0; x < w; x++) - { - const float du = fimage->applyKernelXY(kdu, x, y, 0, 3, wm); - const float dv = fimage->applyKernelXY(kdv, x, y, 0, 3, wm); - - Vector3 n = normalize(Vector3(du, dv, heightScale)); - - fimage->pixel(0, x, y, 0) = 0.5f * n.x + 0.5f; - fimage->pixel(1, x, y, 0) = 0.5f * n.y + 0.5f; - fimage->pixel(2, x, y, 0) = 0.5f * n.z + 0.5f; - } - } - - return fimage.release(); -} - - -// Create normal map using the given kernels. -static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv) -{ - nvDebugCheck(kdu != NULL); - nvDebugCheck(kdv != NULL); - nvDebugCheck(img != NULL); - -#pragma NV_MESSAGE("FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.") - const float heightScale = 1.0f / 16.0f; - - const uint w = img->width(); - const uint h = img->height(); - - AutoPtr img_out(new FloatImage()); - img_out->allocate(4, w, h); - - for (uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - const float du = img->applyKernelXY(kdu, x, y, 0, 3, wm); - const float dv = img->applyKernelXY(kdv, x, y, 0, 3, wm); - - Vector3 n = normalize(Vector3(du, dv, heightScale)); - - img_out->pixel(0, x, y, 0) = n.x; - img_out->pixel(1, x, y, 0) = n.y; - img_out->pixel(2, x, y, 0) = n.z; - } - } - - // Copy alpha channel. - /*for (uint y = 0; y < h; y++) - { - for (uint x = 0; x < w; x++) - { - - img_out->pixel(3, x, y, 0) = img->pixel(3, x, y, 0); - } - }*/ - memcpy(img_out->channel(3), img->channel(3), w * h * sizeof(float)); - - return img_out.release(); -} - - -/// Create normal map using the given filter. -FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/) -{ - nvDebugCheck(img != NULL); - - // Init the kernels. - Kernel2 * kdu = NULL; - Kernel2 * kdv = NULL; - - switch(filter) - { - case NormalMapFilter_Sobel3x3: - kdu = new Kernel2(3); - break; - case NormalMapFilter_Sobel5x5: - kdu = new Kernel2(5); - break; - case NormalMapFilter_Sobel7x7: - kdu = new Kernel2(7); - break; - case NormalMapFilter_Sobel9x9: - kdu = new Kernel2(9); - break; - default: - nvDebugCheck(false); - }; - - kdu->initSobel(); - kdu->normalize(); - - kdv = new Kernel2(*kdu); - kdv->transpose(); - - return ::createNormalMap(img, wm, heightWeights, kdu, kdv); -} - - -/// Create normal map combining multiple sobel filters. -FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights) -{ - nvDebugCheck(img != NULL); - - Kernel2 * kdu = NULL; - Kernel2 * kdv = NULL; - - kdu = new Kernel2(9); - kdu->initBlendedSobel(filterWeights); - kdu->normalize(); - - kdv = new Kernel2(*kdu); - kdv->transpose(); - - return ::createNormalMap(img, wm, heightWeights, kdu, kdv); -} - - -FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights) -{ - nvDebugCheck(img != NULL); - - Kernel2 * kdu = NULL; - Kernel2 * kdv = NULL; - - kdu = new Kernel2(9); - kdu->initBlendedSobel(filterWeights); - kdu->normalize(); - - kdv = new Kernel2(*kdu); - kdv->transpose(); - - return ::createNormalMap(img, wm, kdu, kdv); -} - - -/// Normalize the given image in place. -void nv::normalizeNormalMap(FloatImage * img) -{ - nvDebugCheck(img != NULL); - - img->normalize(0); -} - +// Copyright NVIDIA Corporation 2007 -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#include "NormalMap.h" +#include "Filter.h" +#include "FloatImage.h" +#include "Image.h" + +#include "nvmath/Color.inl" +#include "nvmath/Vector.h" + +#include "nvcore/Ptr.h" + +#include // memcpy + + +using namespace nv; + +// Create normal map using the given kernels. +static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv) +{ + nvDebugCheck(kdu != NULL); + nvDebugCheck(kdv != NULL); + nvDebugCheck(img != NULL); + + const uint w = img->width(); + const uint h = img->height(); + + AutoPtr fimage(new FloatImage()); + fimage->allocate(4, w, h); + + // Compute height and store in alpha channel: + float * alphaChannel = fimage->channel(3); + for(uint i = 0; i < w * h; i++) + { + Vector4 color = toVector4(img->pixel(i)); + alphaChannel[i] = dot(color, heightWeights); + } + + float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor. + + for(uint y = 0; y < h; y++) + { + for(uint x = 0; x < w; x++) + { + const float du = fimage->applyKernelXY(kdu, x, y, 0, 3, wm); + const float dv = fimage->applyKernelXY(kdv, x, y, 0, 3, wm); + + Vector3 n = normalize(Vector3(du, dv, heightScale)); + + fimage->pixel(0, x, y, 0) = 0.5f * n.x + 0.5f; + fimage->pixel(1, x, y, 0) = 0.5f * n.y + 0.5f; + fimage->pixel(2, x, y, 0) = 0.5f * n.z + 0.5f; + } + } + + return fimage.release(); +} + + +// Create normal map using the given kernels. +static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv) +{ + nvDebugCheck(kdu != NULL); + nvDebugCheck(kdv != NULL); + nvDebugCheck(img != NULL); + +#pragma NV_MESSAGE("FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.") + const float heightScale = 1.0f / 16.0f; + + const uint w = img->width(); + const uint h = img->height(); + + AutoPtr img_out(new FloatImage()); + img_out->allocate(4, w, h); + + for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + const float du = img->applyKernelXY(kdu, x, y, 0, 3, wm); + const float dv = img->applyKernelXY(kdv, x, y, 0, 3, wm); + + Vector3 n = normalize(Vector3(du, dv, heightScale)); + + img_out->pixel(0, x, y, 0) = n.x; + img_out->pixel(1, x, y, 0) = n.y; + img_out->pixel(2, x, y, 0) = n.z; + } + } + + // Copy alpha channel. + /*for (uint y = 0; y < h; y++) + { + for (uint x = 0; x < w; x++) + { + + img_out->pixel(3, x, y, 0) = img->pixel(3, x, y, 0); + } + }*/ + memcpy(img_out->channel(3), img->channel(3), w * h * sizeof(float)); + + return img_out.release(); +} + + +/// Create normal map using the given filter. +FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/) +{ + nvDebugCheck(img != NULL); + + // Init the kernels. + Kernel2 * kdu = NULL; + Kernel2 * kdv = NULL; + + switch(filter) + { + case NormalMapFilter_Sobel3x3: + kdu = new Kernel2(3); + break; + case NormalMapFilter_Sobel5x5: + kdu = new Kernel2(5); + break; + case NormalMapFilter_Sobel7x7: + kdu = new Kernel2(7); + break; + case NormalMapFilter_Sobel9x9: + kdu = new Kernel2(9); + break; + default: + nvDebugCheck(false); + }; + + kdu->initSobel(); + kdu->normalize(); + + kdv = new Kernel2(*kdu); + kdv->transpose(); + + return ::createNormalMap(img, wm, heightWeights, kdu, kdv); +} + + +/// Create normal map combining multiple sobel filters. +FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights) +{ + nvDebugCheck(img != NULL); + + Kernel2 * kdu = NULL; + Kernel2 * kdv = NULL; + + kdu = new Kernel2(9); + kdu->initBlendedSobel(filterWeights); + kdu->normalize(); + + kdv = new Kernel2(*kdu); + kdv->transpose(); + + return ::createNormalMap(img, wm, heightWeights, kdu, kdv); +} + + +FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights) +{ + nvDebugCheck(img != NULL); + + Kernel2 * kdu = NULL; + Kernel2 * kdv = NULL; + + kdu = new Kernel2(9); + kdu->initBlendedSobel(filterWeights); + kdu->normalize(); + + kdv = new Kernel2(*kdu); + kdv->transpose(); + + return ::createNormalMap(img, wm, kdu, kdv); +} + + +/// Normalize the given image in place. +void nv::normalizeNormalMap(FloatImage * img) +{ + nvDebugCheck(img != NULL); + + img->normalize(0); +} + diff --git a/src/nvimage/TgaFile.h b/src/nvimage/TgaFile.h index bce2fc1..22310a7 100644 --- a/src/nvimage/TgaFile.h +++ b/src/nvimage/TgaFile.h @@ -101,6 +101,48 @@ inline Stream & operator<< (Stream & s, TgaFile & tga) return s; } + + +// @@ Move to BMP file? + +const int BITMAPFILEHEADER_SIZE = 14; +const int BITMAPINFOHEADER_SIZE = 40; +const int BM_TYPE = ((unsigned int)'M') << 8 | ((unsigned int)'B'); + +// BMP Header. +struct BmpFileHeader { + uint16 type; + uint32 size; + uint16 reserved1; + uint16 reserved2; + uint32 offBits; +}; + +struct BmpInfoHeader { + uint32 size; + uint32 width; + uint32 height; + uint16 planes; + uint16 bitCount; + uint32 compression; + uint32 sizeImage; + uint32 xPelsPerMeter; + uint32 yPelsPerMeter; + uint32 clrUsed; + uint32 clrImportant; +}; + +inline Stream & operator<< (Stream & s, BmpFileHeader & bmp) { + return s << bmp.type << bmp.size << bmp.reserved1 << bmp.reserved2 << bmp.offBits; +} + +inline Stream & operator<< (Stream & s, BmpInfoHeader & bmp) { + s << bmp.size << bmp.width << bmp.height << bmp.planes << bmp.bitCount << bmp.compression << bmp.sizeImage; + s << bmp.xPelsPerMeter << bmp.yPelsPerMeter << bmp.clrUsed << bmp.clrImportant; + return s; +} + + } // nv namespace #endif // NV_IMAGE_TGAFILE_H diff --git a/src/nvmath/CMakeLists.txt b/src/nvmath/CMakeLists.txt index abeb05f..c59cfeb 100644 --- a/src/nvmath/CMakeLists.txt +++ b/src/nvmath/CMakeLists.txt @@ -7,7 +7,7 @@ SET(MATH_SRCS Fitting.h Fitting.cpp Gamma.h Gamma.cpp Half.h Half.cpp - Matrix.h + Matrix.h Matrix.inl Matrix.cpp Plane.h Plane.inl Plane.cpp SphericalHarmonic.h SphericalHarmonic.cpp SimdVector.h SimdVector_SSE.h SimdVector_VE.h diff --git a/src/nvmath/Color.inl b/src/nvmath/Color.inl index 2b87ee4..d871704 100644 --- a/src/nvmath/Color.inl +++ b/src/nvmath/Color.inl @@ -157,6 +157,12 @@ namespace nv return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale); } + inline Vector3 toVector3(Color32 c) + { + const float scale = 1.0f / 255.0f; + return Vector3(c.r * scale, c.g * scale, c.b * scale); + } + inline float perceptualColorDistance(Vector3::Arg c0, Vector3::Arg c1) { diff --git a/src/nvmath/Matrix.cpp b/src/nvmath/Matrix.cpp index 29bd19f..d171d13 100644 --- a/src/nvmath/Matrix.cpp +++ b/src/nvmath/Matrix.cpp @@ -1,441 +1,487 @@ -// This code is in the public domain -- castanyo@yahoo.es - -#include "Matrix.inl" -#include "Vector.inl" - -#include "nvcore/Array.inl" - -#include - -#if !NV_CC_MSVC && !NV_OS_ORBIS -#include -#endif - -using namespace nv; - - -// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise -// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above; -// indx[1..n] is an output vector that records the row permutation effected by the partial -// pivoting; d is output as -1 depending on whether the number of row interchanges was even -// or odd, respectively. This routine is used in combination with lubksb to solve linear equations -// or invert a matrix. -static bool ludcmp(float **a, int n, int *indx, float *d) -{ - const float TINY = 1.0e-20f; - - float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row. - - *d = 1.0; // No row interchanges yet. - for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information. - - float big = 0.0; - for (int j = 0; j < n; j++) { - big = max(big, fabsf(a[i][j])); - } - if (big == 0) { - return false; // Singular matrix - } - - // No nonzero largest element. - vv[i] = 1.0f / big; // Save the scaling. - } - - for (int j = 0; j < n; j++) { // This is the loop over columns of Crout's method. - for (int i = 0; i < j; i++) { // This is equation (2.3.12) except for i = j. - float sum = a[i][j]; - for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j]; - a[i][j] = sum; - } - - int imax = -1; - float big = 0.0; // Initialize for the search for largest pivot element. - for (int i = j; i < n; i++) { // This is i = j of equation (2.3.12) and i = j+ 1 : : : N - float sum = a[i][j]; // of equation (2.3.13). - for (int k = 0; k < j; k++) { - sum -= a[i][k]*a[k][j]; - } - a[i][j]=sum; - - float dum = vv[i]*fabs(sum); - if (dum >= big) { - // Is the figure of merit for the pivot better than the best so far? - big = dum; - imax = i; - } - } - nvDebugCheck(imax != -1); - - if (j != imax) { // Do we need to interchange rows? - for (int k = 0; k < n; k++) { // Yes, do so... - swap(a[imax][k], a[j][k]); - } - *d = -(*d); // ...and change the parity of d. - vv[imax]=vv[j]; // Also interchange the scale factor. - } - - indx[j]=imax; - if (a[j][j] == 0.0) a[j][j] = TINY; - - // If the pivot element is zero the matrix is singular (at least to the precision of the - // algorithm). For some applications on singular matrices, it is desirable to substitute - // TINY for zero. - if (j != n-1) { // Now, finally, divide by the pivot element. - float dum = 1.0f / a[j][j]; - for (int i = j+1; i < n; i++) a[i][j] *= dum; - } - } // Go back for the next column in the reduction. - - return true; -} - - -// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix -// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input -// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector -// B, and returns with the solution vector X. a, n, and indx are not modified by this routine -// and can be left in place for successive calls with different right-hand sides b. This routine takes -// into account the possibility that b will begin with many zero elements, so it is efficient for use -// in matrix inversion. -static void lubksb(float **a, int n, int *indx, float b[]) -{ - int ii = 0; - for (int i=0; i=0; i--) { // Now we do the backsubstitution, equation (2.3.7). - float sum = b[i]; - for (int j = i+1; j < n; j++) { - sum -= a[i][j]*b[j]; - } - b[i] = sum/a[i][i]; // Store a component of the solution vector X. - } // All done! -} - - -bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x) -{ - nvDebugCheck(x != NULL); - - float m[4][4]; - float *a[4] = {m[0], m[1], m[2], m[3]}; - int idx[4]; - float d; - - for (int y = 0; y < 4; y++) { - for (int x = 0; x < 4; x++) { - a[x][y] = A(x, y); - } - } - - // Create LU decomposition. - if (!ludcmp(a, 4, idx, &d)) { - // Singular matrix. - return false; - } - - // Init solution. - *x = b; - - // Do back substitution. - lubksb(a, 4, idx, x->component); - - return true; -} - -// @@ Not tested. -Matrix nv::inverseLU(const Matrix & A) -{ - Vector4 Ai[4]; - - solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]); - solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]); - solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]); - solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]); - - return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]); -} - - - -bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x) -{ - nvDebugCheck(x != NULL); - - float m[3][3]; - float *a[3] = {m[0], m[1], m[2]}; - int idx[3]; - float d; - - for (int y = 0; y < 3; y++) { - for (int x = 0; x < 3; x++) { - a[x][y] = A(x, y); - } - } - - // Create LU decomposition. - if (!ludcmp(a, 3, idx, &d)) { - // Singular matrix. - return false; - } - - // Init solution. - *x = b; - - // Do back substitution. - lubksb(a, 3, idx, x->component); - - return true; -} - - -bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x) -{ - nvDebugCheck(x != NULL); - - *x = transform(inverseCramer(A), b); - - return true; // @@ Return false if determinant(A) == 0 ! -} - -bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x) -{ - nvDebugCheck(x != NULL); - - const float det = A.determinant(); - if (equal(det, 0.0f)) { // @@ Use input epsilon. - return false; - } - - Matrix3 Ai = inverseCramer(A); - - *x = transform(Ai, b); - - return true; -} - - - -// Inverse using gaussian elimination. From Jon's code. -Matrix nv::inverse(const Matrix & m) { - - Matrix A = m; - Matrix B(identity); - - int i, j, k; - float max, t, det, pivot; - - det = 1.0; - for (i=0; i<4; i++) { /* eliminate in column i, below diag */ - max = -1.; - for (k=i; k<4; k++) /* find pivot for column i */ - if (fabs(A(k, i)) > max) { - max = fabs(A(k, i)); - j = k; - } - if (max<=0.) return B; /* if no nonzero pivot, PUNT */ - if (j!=i) { /* swap rows i and j */ - for (k=i; k<4; k++) - swap(A(i, k), A(j, k)); - for (k=0; k<4; k++) - swap(B(i, k), B(j, k)); - det = -det; - } - pivot = A(i, i); - det *= pivot; - for (k=i+1; k<4; k++) /* only do elems to right of pivot */ - A(i, k) /= pivot; - for (k=0; k<4; k++) - B(i, k) /= pivot; - /* we know that A(i, i) will be set to 1, so don't bother to do it */ - - for (j=i+1; j<4; j++) { /* eliminate in rows below i */ - t = A(j, i); /* we're gonna zero this guy */ - for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ - A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ - for (k=0; k<4; k++) - B(j, k) -= B(i, k)*t; - } - } - - /*---------- backward elimination ----------*/ - - for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ - for (j=0; j max) { - max = fabs(A(k, i)); - j = k; - } - if (max<=0.) return B; /* if no nonzero pivot, PUNT */ - if (j!=i) { /* swap rows i and j */ - for (k=i; k<3; k++) - swap(A(i, k), A(j, k)); - for (k=0; k<3; k++) - swap(B(i, k), B(j, k)); - det = -det; - } - pivot = A(i, i); - det *= pivot; - for (k=i+1; k<3; k++) /* only do elems to right of pivot */ - A(i, k) /= pivot; - for (k=0; k<3; k++) - B(i, k) /= pivot; - /* we know that A(i, i) will be set to 1, so don't bother to do it */ - - for (j=i+1; j<3; j++) { /* eliminate in rows below i */ - t = A(j, i); /* we're gonna zero this guy */ - for (k=i+1; k<3; k++) /* subtract scaled row i from row j */ - A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ - for (k=0; k<3; k++) - B(j, k) -= B(i, k)*t; - } - } - - /*---------- backward elimination ----------*/ - - for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */ - for (j=0; j. -// -// Returns determinant of A, and B=inverse(A) -// If matrix A is singular, returns 0 and leaves trash in B. -// -#define SWAP(a, b, t) {t = a; a = b; b = t;} -double invert(Mat4& B, const Mat4& m) -{ - Mat4 A = m; - int i, j, k; - double max, t, det, pivot; - - /*---------- forward elimination ----------*/ - - for (i=0; i<4; i++) /* put identity matrix in B */ - for (j=0; j<4; j++) - B(i, j) = (double)(i==j); - - det = 1.0; - for (i=0; i<4; i++) { /* eliminate in column i, below diag */ - max = -1.; - for (k=i; k<4; k++) /* find pivot for column i */ - if (fabs(A(k, i)) > max) { - max = fabs(A(k, i)); - j = k; - } - if (max<=0.) return 0.; /* if no nonzero pivot, PUNT */ - if (j!=i) { /* swap rows i and j */ - for (k=i; k<4; k++) - SWAP(A(i, k), A(j, k), t); - for (k=0; k<4; k++) - SWAP(B(i, k), B(j, k), t); - det = -det; - } - pivot = A(i, i); - det *= pivot; - for (k=i+1; k<4; k++) /* only do elems to right of pivot */ - A(i, k) /= pivot; - for (k=0; k<4; k++) - B(i, k) /= pivot; - /* we know that A(i, i) will be set to 1, so don't bother to do it */ - - for (j=i+1; j<4; j++) { /* eliminate in rows below i */ - t = A(j, i); /* we're gonna zero this guy */ - for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ - A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ - for (k=0; k<4; k++) - B(j, k) -= B(i, k)*t; - } - } - - /*---------- backward elimination ----------*/ - - for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ - for (j=0; j + +#if !NV_CC_MSVC && !NV_OS_ORBIS +#include +#endif + +using namespace nv; + + +// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise +// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above; +// indx[1..n] is an output vector that records the row permutation effected by the partial +// pivoting; d is output as -1 depending on whether the number of row interchanges was even +// or odd, respectively. This routine is used in combination with lubksb to solve linear equations +// or invert a matrix. +static bool ludcmp(float **a, int n, int *indx, float *d) +{ + const float TINY = 1.0e-20f; + + float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row. + + *d = 1.0; // No row interchanges yet. + for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information. + + float big = 0.0; + for (int j = 0; j < n; j++) { + big = max(big, fabsf(a[i][j])); + } + if (big == 0) { + return false; // Singular matrix + } + + // No nonzero largest element. + vv[i] = 1.0f / big; // Save the scaling. + } + + for (int j = 0; j < n; j++) { // This is the loop over columns of Crout's method. + for (int i = 0; i < j; i++) { // This is equation (2.3.12) except for i = j. + float sum = a[i][j]; + for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j]; + a[i][j] = sum; + } + + int imax = -1; + float big = 0.0; // Initialize for the search for largest pivot element. + for (int i = j; i < n; i++) { // This is i = j of equation (2.3.12) and i = j+ 1 : : : N + float sum = a[i][j]; // of equation (2.3.13). + for (int k = 0; k < j; k++) { + sum -= a[i][k]*a[k][j]; + } + a[i][j]=sum; + + float dum = vv[i]*fabs(sum); + if (dum >= big) { + // Is the figure of merit for the pivot better than the best so far? + big = dum; + imax = i; + } + } + nvDebugCheck(imax != -1); + + if (j != imax) { // Do we need to interchange rows? + for (int k = 0; k < n; k++) { // Yes, do so... + swap(a[imax][k], a[j][k]); + } + *d = -(*d); // ...and change the parity of d. + vv[imax]=vv[j]; // Also interchange the scale factor. + } + + indx[j]=imax; + if (a[j][j] == 0.0) a[j][j] = TINY; + + // If the pivot element is zero the matrix is singular (at least to the precision of the + // algorithm). For some applications on singular matrices, it is desirable to substitute + // TINY for zero. + if (j != n-1) { // Now, finally, divide by the pivot element. + float dum = 1.0f / a[j][j]; + for (int i = j+1; i < n; i++) a[i][j] *= dum; + } + } // Go back for the next column in the reduction. + + return true; +} + + +// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix +// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input +// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector +// B, and returns with the solution vector X. a, n, and indx are not modified by this routine +// and can be left in place for successive calls with different right-hand sides b. This routine takes +// into account the possibility that b will begin with many zero elements, so it is efficient for use +// in matrix inversion. +static void lubksb(float **a, int n, int *indx, float b[]) +{ + int ii = 0; + for (int i=0; i=0; i--) { // Now we do the backsubstitution, equation (2.3.7). + float sum = b[i]; + for (int j = i+1; j < n; j++) { + sum -= a[i][j]*b[j]; + } + b[i] = sum/a[i][i]; // Store a component of the solution vector X. + } // All done! +} + + +bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x) +{ + nvDebugCheck(x != NULL); + + float m[4][4]; + float *a[4] = {m[0], m[1], m[2], m[3]}; + int idx[4]; + float d; + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + a[x][y] = A(x, y); + } + } + + // Create LU decomposition. + if (!ludcmp(a, 4, idx, &d)) { + // Singular matrix. + return false; + } + + // Init solution. + *x = b; + + // Do back substitution. + lubksb(a, 4, idx, x->component); + + return true; +} + +// @@ Not tested. +Matrix nv::inverseLU(const Matrix & A) +{ + Vector4 Ai[4]; + + solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]); + solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]); + solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]); + solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]); + + return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]); +} + + + +bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x) +{ + nvDebugCheck(x != NULL); + + float m[3][3]; + float *a[3] = {m[0], m[1], m[2]}; + int idx[3]; + float d; + + for (int y = 0; y < 3; y++) { + for (int x = 0; x < 3; x++) { + a[x][y] = A(x, y); + } + } + + // Create LU decomposition. + if (!ludcmp(a, 3, idx, &d)) { + // Singular matrix. + return false; + } + + // Init solution. + *x = b; + + // Do back substitution. + lubksb(a, 3, idx, x->component); + + return true; +} + +bool nv::solveLU(const Matrix2 & A, const Vector2 & b, Vector2 * x) +{ + nvDebugCheck(x != NULL); + + float m[2][2]; + float *a[2] = {m[0], m[1]}; + int idx[2]; + float d; + + for (int y = 0; y < 2; y++) { + for (int x = 0; x < 2; x++) { + a[x][y] = A(x, y); + } + } + + // Create LU decomposition. + if (!ludcmp(a, 2, idx, &d)) { + // Singular matrix. + return false; + } + + // Init solution. + *x = b; + + // Do back substitution. + lubksb(a, 2, idx, x->component); + + return true; +} + + +bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x) +{ + nvDebugCheck(x != NULL); + + *x = transform(inverseCramer(A), b); + + return true; // @@ Return false if determinant(A) == 0 ! +} + +bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x) +{ + nvDebugCheck(x != NULL); + + const float det = A.determinant(); + if (equal(det, 0.0f)) { // @@ Use input epsilon. + return false; + } + + Matrix3 Ai = inverseCramer(A); + + *x = transform(Ai, b); + + return true; +} + +bool nv::solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x) +{ + nvDebugCheck(x != NULL); + + const float det = A.determinant(); + if (equal(det, 0.0f)) { // @@ Use input epsilon. + return false; + } + + Matrix2 Ai = inverseCramer(A); + + *x = transform(Ai, b); + + return true; +} + + + +// Inverse using gaussian elimination. From Jon's code. +Matrix nv::inverse(const Matrix & m) { + + Matrix A = m; + Matrix B(identity); + + int i, j, k; + float max, t, det, pivot; + + det = 1.0; + for (i=0; i<4; i++) { /* eliminate in column i, below diag */ + max = -1.; + for (k=i; k<4; k++) /* find pivot for column i */ + if (fabs(A(k, i)) > max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return B; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<4; k++) + swap(A(i, k), A(j, k)); + for (k=0; k<4; k++) + swap(B(i, k), B(j, k)); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<4; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<4; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<4; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<4; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; j max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return B; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<3; k++) + swap(A(i, k), A(j, k)); + for (k=0; k<3; k++) + swap(B(i, k), B(j, k)); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<3; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<3; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<3; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<3; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<3; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; j. +// +// Returns determinant of A, and B=inverse(A) +// If matrix A is singular, returns 0 and leaves trash in B. +// +#define SWAP(a, b, t) {t = a; a = b; b = t;} +double invert(Mat4& B, const Mat4& m) +{ + Mat4 A = m; + int i, j, k; + double max, t, det, pivot; + + /*---------- forward elimination ----------*/ + + for (i=0; i<4; i++) /* put identity matrix in B */ + for (j=0; j<4; j++) + B(i, j) = (double)(i==j); + + det = 1.0; + for (i=0; i<4; i++) { /* eliminate in column i, below diag */ + max = -1.; + for (k=i; k<4; k++) /* find pivot for column i */ + if (fabs(A(k, i)) > max) { + max = fabs(A(k, i)); + j = k; + } + if (max<=0.) return 0.; /* if no nonzero pivot, PUNT */ + if (j!=i) { /* swap rows i and j */ + for (k=i; k<4; k++) + SWAP(A(i, k), A(j, k), t); + for (k=0; k<4; k++) + SWAP(B(i, k), B(j, k), t); + det = -det; + } + pivot = A(i, i); + det *= pivot; + for (k=i+1; k<4; k++) /* only do elems to right of pivot */ + A(i, k) /= pivot; + for (k=0; k<4; k++) + B(i, k) /= pivot; + /* we know that A(i, i) will be set to 1, so don't bother to do it */ + + for (j=i+1; j<4; j++) { /* eliminate in rows below i */ + t = A(j, i); /* we're gonna zero this guy */ + for (k=i+1; k<4; k++) /* subtract scaled row i from row j */ + A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */ + for (k=0; k<4; k++) + B(j, k) -= B(i, k)*t; + } + } + + /*---------- backward elimination ----------*/ + + for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */ + for (j=0; j T to(Vector2::Arg v) { return T(v.x, v.y); } - - // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor. - //template T to(Vector3::Arg v) { return T(v.x, v.y, v.z); } - - // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor. - //template T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); } - - - // Vector2 - inline Vector2::Vector2() {} - inline Vector2::Vector2(float f) : x(f), y(f) {} - inline Vector2::Vector2(float x, float y) : x(x), y(y) {} - inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {} - - inline const Vector2 & Vector2::operator=(Vector2::Arg v) - { - x = v.x; - y = v.y; - return *this; - } - - inline const float * Vector2::ptr() const - { - return &x; - } - - inline void Vector2::set(float x, float y) - { - this->x = x; - this->y = y; - } - - inline Vector2 Vector2::operator-() const - { - return Vector2(-x, -y); - } - - inline void Vector2::operator+=(Vector2::Arg v) - { - x += v.x; - y += v.y; - } - - inline void Vector2::operator-=(Vector2::Arg v) - { - x -= v.x; - y -= v.y; - } - - inline void Vector2::operator*=(float s) - { - x *= s; - y *= s; - } - - inline void Vector2::operator*=(Vector2::Arg v) - { - x *= v.x; - y *= v.y; - } - - inline bool operator==(Vector2::Arg a, Vector2::Arg b) - { - return a.x == b.x && a.y == b.y; - } - inline bool operator!=(Vector2::Arg a, Vector2::Arg b) - { - return a.x != b.x || a.y != b.y; - } - - - // Vector3 - inline Vector3::Vector3() {} - inline Vector3::Vector3(float f) : x(f), y(f), z(f) {} - inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {} - inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {} - inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {} - - inline const Vector3 & Vector3::operator=(Vector3::Arg v) - { - x = v.x; - y = v.y; - z = v.z; - return *this; - } - - - inline Vector2 Vector3::xy() const - { - return Vector2(x, y); - } - - inline const float * Vector3::ptr() const - { - return &x; - } - - inline void Vector3::set(float x, float y, float z) - { - this->x = x; - this->y = y; - this->z = z; - } - - inline Vector3 Vector3::operator-() const - { - return Vector3(-x, -y, -z); - } - - inline void Vector3::operator+=(Vector3::Arg v) - { - x += v.x; - y += v.y; - z += v.z; - } - - inline void Vector3::operator-=(Vector3::Arg v) - { - x -= v.x; - y -= v.y; - z -= v.z; - } - - inline void Vector3::operator*=(float s) - { - x *= s; - y *= s; - z *= s; - } - - inline void Vector3::operator/=(float s) - { - float is = 1.0f / s; - x *= is; - y *= is; - z *= is; - } - - inline void Vector3::operator*=(Vector3::Arg v) - { - x *= v.x; - y *= v.y; - z *= v.z; - } - - inline void Vector3::operator/=(Vector3::Arg v) - { - x /= v.x; - y /= v.y; - z /= v.z; - } - - inline bool operator==(Vector3::Arg a, Vector3::Arg b) - { - return a.x == b.x && a.y == b.y && a.z == b.z; - } - inline bool operator!=(Vector3::Arg a, Vector3::Arg b) - { - return a.x != b.x || a.y != b.y || a.z != b.z; - } - - - // Vector4 - inline Vector4::Vector4() {} - inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {} - inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} - inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {} - inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {} - inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {} - inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {} - - inline const Vector4 & Vector4::operator=(const Vector4 & v) - { - x = v.x; - y = v.y; - z = v.z; - w = v.w; - return *this; - } - - inline Vector2 Vector4::xy() const - { - return Vector2(x, y); - } - - inline Vector2 Vector4::zw() const - { - return Vector2(z, w); - } - - inline Vector3 Vector4::xyz() const - { - return Vector3(x, y, z); - } - - inline const float * Vector4::ptr() const - { - return &x; - } - - inline void Vector4::set(float x, float y, float z, float w) - { - this->x = x; - this->y = y; - this->z = z; - this->w = w; - } - - inline Vector4 Vector4::operator-() const - { - return Vector4(-x, -y, -z, -w); - } - - inline void Vector4::operator+=(Vector4::Arg v) - { - x += v.x; - y += v.y; - z += v.z; - w += v.w; - } - - inline void Vector4::operator-=(Vector4::Arg v) - { - x -= v.x; - y -= v.y; - z -= v.z; - w -= v.w; - } - - inline void Vector4::operator*=(float s) - { - x *= s; - y *= s; - z *= s; - w *= s; - } - - inline void Vector4::operator/=(float s) - { - x /= s; - y /= s; - z /= s; - w /= s; - } - - inline void Vector4::operator*=(Vector4::Arg v) - { - x *= v.x; - y *= v.y; - z *= v.z; - w *= v.w; - } - - inline void Vector4::operator/=(Vector4::Arg v) - { - x /= v.x; - y /= v.y; - z /= v.z; - w /= v.w; - } - - inline bool operator==(Vector4::Arg a, Vector4::Arg b) - { - return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; - } - inline bool operator!=(Vector4::Arg a, Vector4::Arg b) - { - return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; - } - - - - // Functions - - - // Vector2 - - inline Vector2 add(Vector2::Arg a, Vector2::Arg b) - { - return Vector2(a.x + b.x, a.y + b.y); - } - inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b) - { - return add(a, b); - } - - inline Vector2 sub(Vector2::Arg a, Vector2::Arg b) - { - return Vector2(a.x - b.x, a.y - b.y); - } - inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b) - { - return sub(a, b); - } - - inline Vector2 scale(Vector2::Arg v, float s) - { - return Vector2(v.x * s, v.y * s); - } - - inline Vector2 scale(Vector2::Arg v, Vector2::Arg s) - { - return Vector2(v.x * s.x, v.y * s.y); - } - - inline Vector2 operator*(Vector2::Arg v, float s) - { - return scale(v, s); - } - - inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2) - { - return Vector2(v1.x*v2.x, v1.y*v2.y); - } - - inline Vector2 operator*(float s, Vector2::Arg v) - { - return scale(v, s); - } - - inline Vector2 operator/(Vector2::Arg v, float s) - { - return scale(v, 1.0f/s); - } - - inline Vector2 lerp(Vector2::Arg v1, Vector2::Arg v2, float t) - { - const float s = 1.0f - t; - return Vector2(v1.x * s + t * v2.x, v1.y * s + t * v2.y); - } - - inline float dot(Vector2::Arg a, Vector2::Arg b) - { - return a.x * b.x + a.y * b.y; - } - - inline float lengthSquared(Vector2::Arg v) - { - return v.x * v.x + v.y * v.y; - } - - inline float length(Vector2::Arg v) - { - return sqrtf(lengthSquared(v)); - } - - inline float distance(Vector2::Arg a, Vector2::Arg b) - { - return length(a - b); - } - - inline float inverseLength(Vector2::Arg v) - { - return 1.0f / sqrtf(lengthSquared(v)); - } - - inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON) - { - return equal(length(v), 1, epsilon); - } - - inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON) - { - float l = length(v); - nvDebugCheck(!isZero(l, epsilon)); - Vector2 n = scale(v, 1.0f / l); - nvDebugCheck(isNormalized(n)); - return n; - } - - inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON) - { - float l = length(v); - if (isZero(l, epsilon)) { - return fallback; - } - return scale(v, 1.0f / l); - } - - // Safe, branchless normalization from Andy Firth. All error checking ommitted. - // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ - inline Vector2 normalizeFast(Vector2::Arg v) - { - const float very_small_float = 1.0e-037f; - float l = very_small_float + length(v); - return scale(v, 1.0f / l); - } - - inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) - { - return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon); - } - - inline Vector2 min(Vector2::Arg a, Vector2::Arg b) - { - return Vector2(min(a.x, b.x), min(a.y, b.y)); - } - - inline Vector2 max(Vector2::Arg a, Vector2::Arg b) - { - return Vector2(max(a.x, b.x), max(a.y, b.y)); - } - - inline Vector2 clamp(Vector2::Arg v, float min, float max) - { - return Vector2(clamp(v.x, min, max), clamp(v.y, min, max)); - } - - inline Vector2 saturate(Vector2::Arg v) - { - return Vector2(saturate(v.x), saturate(v.y)); - } - - inline bool isFinite(Vector2::Arg v) - { - return isFinite(v.x) && isFinite(v.y); - } - - inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f)) - { - if (!isFinite(v)) return fallback; - Vector2 vf = v; - nv::floatCleanup(vf.component, 2); - return vf; - } - - // Note, this is the area scaled by 2! - inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1) - { - return (v0.x * v1.y - v0.y * v1.x); // * 0.5f; - } - inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c) - { - // IC: While it may be appealing to use the following expression: - //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f; - - // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point - // numbers and the results becomes very unstable and dependent on the order of the factors. - - // Instead, it's preferable to substract the vertices first, and multiply the resulting small values together. The result - // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of - // the triangle. - - //return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f; - return triangleArea(a-c, b-c); - } - - - template <> - inline uint hash(const Vector2 & v, uint h) - { - return sdbmFloatHash(v.component, 2, h); - } - - - - // Vector3 - - inline Vector3 add(Vector3::Arg a, Vector3::Arg b) - { - return Vector3(a.x + b.x, a.y + b.y, a.z + b.z); - } - inline Vector3 add(Vector3::Arg a, float b) - { - return Vector3(a.x + b, a.y + b, a.z + b); - } - inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b) - { - return add(a, b); - } - inline Vector3 operator+(Vector3::Arg a, float b) - { - return add(a, b); - } - - inline Vector3 sub(Vector3::Arg a, Vector3::Arg b) - { - return Vector3(a.x - b.x, a.y - b.y, a.z - b.z); - } - inline Vector3 sub(Vector3::Arg a, float b) - { - return Vector3(a.x - b, a.y - b, a.z - b); - } - inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b) - { - return sub(a, b); - } - inline Vector3 operator-(Vector3::Arg a, float b) - { - return sub(a, b); - } - - inline Vector3 cross(Vector3::Arg a, Vector3::Arg b) - { - return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); - } - - inline Vector3 scale(Vector3::Arg v, float s) - { - return Vector3(v.x * s, v.y * s, v.z * s); - } - - inline Vector3 scale(Vector3::Arg v, Vector3::Arg s) - { - return Vector3(v.x * s.x, v.y * s.y, v.z * s.z); - } - - inline Vector3 operator*(Vector3::Arg v, float s) - { - return scale(v, s); - } - - inline Vector3 operator*(float s, Vector3::Arg v) - { - return scale(v, s); - } - - inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s) - { - return scale(v, s); - } - - inline Vector3 operator/(Vector3::Arg v, float s) - { - return scale(v, 1.0f/s); - } - - /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s) - { - return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s); - }*/ - - inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t) - { - const float s = 1.0f - t; - return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z); - } - - inline float dot(Vector3::Arg a, Vector3::Arg b) - { - return a.x * b.x + a.y * b.y + a.z * b.z; - } - - inline float lengthSquared(Vector3::Arg v) - { - return v.x * v.x + v.y * v.y + v.z * v.z; - } - - inline float length(Vector3::Arg v) - { - return sqrtf(lengthSquared(v)); - } - - inline float distance(Vector3::Arg a, Vector3::Arg b) - { - return length(a - b); - } - - inline float distanceSquared(Vector3::Arg a, Vector3::Arg b) - { - return lengthSquared(a - b); - } - - inline float inverseLength(Vector3::Arg v) - { - return 1.0f / sqrtf(lengthSquared(v)); - } - - inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON) - { - return equal(length(v), 1, epsilon); - } - - inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON) - { - float l = length(v); - nvDebugCheck(!isZero(l, epsilon)); - Vector3 n = scale(v, 1.0f / l); - nvDebugCheck(isNormalized(n)); - return n; - } - - inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON) - { - float l = length(v); - if (isZero(l, epsilon)) { - return fallback; - } - return scale(v, 1.0f / l); - } - - // Safe, branchless normalization from Andy Firth. All error checking ommitted. - // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ - inline Vector3 normalizeFast(Vector3::Arg v) - { - const float very_small_float = 1.0e-037f; - float l = very_small_float + length(v); - return scale(v, 1.0f / l); - } - - inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON) - { - return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon); - } - - inline Vector3 min(Vector3::Arg a, Vector3::Arg b) - { - return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); - } - - inline Vector3 max(Vector3::Arg a, Vector3::Arg b) - { - return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); - } - - inline Vector3 clamp(Vector3::Arg v, float min, float max) - { - return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max)); - } - - inline Vector3 saturate(Vector3::Arg v) - { - return Vector3(saturate(v.x), saturate(v.y), saturate(v.z)); - } - - inline Vector3 floor(Vector3::Arg v) - { - return Vector3(floorf(v.x), floorf(v.y), floorf(v.z)); - } - - inline Vector3 ceil(Vector3::Arg v) - { - return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z)); - } - - inline bool isFinite(Vector3::Arg v) - { - return isFinite(v.x) && isFinite(v.y) && isFinite(v.z); - } - - inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f)) - { - if (!isFinite(v)) return fallback; - Vector3 vf = v; - nv::floatCleanup(vf.component, 3); - return vf; - } - - inline Vector3 reflect(Vector3::Arg v, Vector3::Arg n) - { - return v - (2 * dot(v, n)) * n; - } - - template <> - inline uint hash(const Vector3 & v, uint h) - { - return sdbmFloatHash(v.component, 3, h); - } - - - // Vector4 - - inline Vector4 add(Vector4::Arg a, Vector4::Arg b) - { - return Vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); - } - inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b) - { - return add(a, b); - } - - inline Vector4 sub(Vector4::Arg a, Vector4::Arg b) - { - return Vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); - } - inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b) - { - return sub(a, b); - } - - inline Vector4 scale(Vector4::Arg v, float s) - { - return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); - } - - inline Vector4 scale(Vector4::Arg v, Vector4::Arg s) - { - return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w); - } - - inline Vector4 operator*(Vector4::Arg v, float s) - { - return scale(v, s); - } - - inline Vector4 operator*(float s, Vector4::Arg v) - { - return scale(v, s); - } - - inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s) - { - return scale(v, s); - } - - inline Vector4 operator/(Vector4::Arg v, float s) - { - return scale(v, 1.0f/s); - } - - /*inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s) - { - return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s); - }*/ - - inline Vector4 lerp(Vector4::Arg v1, Vector4::Arg v2, float t) - { - const float s = 1.0f - t; - return Vector4(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z, v1.w * s + t * v2.w); - } - - inline float dot(Vector4::Arg a, Vector4::Arg b) - { - return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; - } - - inline float lengthSquared(Vector4::Arg v) - { - return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; - } - - inline float length(Vector4::Arg v) - { - return sqrtf(lengthSquared(v)); - } - - inline float inverseLength(Vector4::Arg v) - { - return 1.0f / sqrtf(lengthSquared(v)); - } - - inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON) - { - return equal(length(v), 1, epsilon); - } - - inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON) - { - float l = length(v); - nvDebugCheck(!isZero(l, epsilon)); - Vector4 n = scale(v, 1.0f / l); - nvDebugCheck(isNormalized(n)); - return n; - } - - inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON) - { - float l = length(v); - if (isZero(l, epsilon)) { - return fallback; - } - return scale(v, 1.0f / l); - } - - // Safe, branchless normalization from Andy Firth. All error checking ommitted. - // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ - inline Vector4 normalizeFast(Vector4::Arg v) - { - const float very_small_float = 1.0e-037f; - float l = very_small_float + length(v); - return scale(v, 1.0f / l); - } - - inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON) - { - return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon); - } - - inline Vector4 min(Vector4::Arg a, Vector4::Arg b) - { - return Vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); - } - - inline Vector4 max(Vector4::Arg a, Vector4::Arg b) - { - return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); - } - - inline Vector4 clamp(Vector4::Arg v, float min, float max) - { - return Vector4(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max), clamp(v.w, min, max)); - } - - inline Vector4 saturate(Vector4::Arg v) - { - return Vector4(saturate(v.x), saturate(v.y), saturate(v.z), saturate(v.w)); - } - - inline bool isFinite(Vector4::Arg v) - { - return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w); - } - - inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f)) - { - if (!isFinite(v)) return fallback; - Vector4 vf = v; - nv::floatCleanup(vf.component, 4); - return vf; - } - - template <> - inline uint hash(const Vector4 & v, uint h) - { - return sdbmFloatHash(v.component, 4, h); - } - - -#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float - - //int: - - inline Vector2 scale(Vector2::Arg v, int s) - { - return Vector2(v.x * s, v.y * s); - } - - inline Vector2 operator*(Vector2::Arg v, int s) - { - return scale(v, s); - } - - inline Vector2 operator*(int s, Vector2::Arg v) - { - return scale(v, s); - } - - inline Vector2 operator/(Vector2::Arg v, int s) - { - return scale(v, 1.0f/s); - } - - inline Vector3 scale(Vector3::Arg v, int s) - { - return Vector3(v.x * s, v.y * s, v.z * s); - } - - inline Vector3 operator*(Vector3::Arg v, int s) - { - return scale(v, s); - } - - inline Vector3 operator*(int s, Vector3::Arg v) - { - return scale(v, s); - } - - inline Vector3 operator/(Vector3::Arg v, int s) - { - return scale(v, 1.0f/s); - } - - inline Vector4 scale(Vector4::Arg v, int s) - { - return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); - } - - inline Vector4 operator*(Vector4::Arg v, int s) - { - return scale(v, s); - } - - inline Vector4 operator*(int s, Vector4::Arg v) - { - return scale(v, s); - } - - inline Vector4 operator/(Vector4::Arg v, int s) - { - return scale(v, 1.0f/s); - } - - //double: - - inline Vector3 operator*(Vector3::Arg v, double s) - { - return scale(v, (float)s); - } - - inline Vector3 operator*(double s, Vector3::Arg v) - { - return scale(v, (float)s); - } - - inline Vector3 operator/(Vector3::Arg v, double s) - { - return scale(v, 1.f/((float)s)); - } - -#endif //NV_OS_IOS - -} // nv namespace - -#endif // NV_MATH_VECTOR_INL +// This code is in the public domain -- castanyo@yahoo.es + +#pragma once +#ifndef NV_MATH_VECTOR_INL +#define NV_MATH_VECTOR_INL + +#include "Vector.h" +#include "nvcore/Utils.h" // min, max +#include "nvcore/Hash.h" // hash + +namespace nv +{ + + // Helpers to convert vector types. Assume T has x,y members and 2 argument constructor. + //template T to(Vector2::Arg v) { return T(v.x, v.y); } + + // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor. + //template T to(Vector3::Arg v) { return T(v.x, v.y, v.z); } + + // Helpers to convert vector types. Assume T has x,y,z members and 3 argument constructor. + //template T to(Vector4::Arg v) { return T(v.x, v.y, v.z, v.w); } + + + // Vector2 + inline Vector2::Vector2() {} + inline Vector2::Vector2(float f) : x(f), y(f) {} + inline Vector2::Vector2(float x, float y) : x(x), y(y) {} + inline Vector2::Vector2(Vector2::Arg v) : x(v.x), y(v.y) {} + + inline const Vector2 & Vector2::operator=(Vector2::Arg v) + { + x = v.x; + y = v.y; + return *this; + } + + inline const float * Vector2::ptr() const + { + return &x; + } + + inline void Vector2::set(float x, float y) + { + this->x = x; + this->y = y; + } + + inline Vector2 Vector2::operator-() const + { + return Vector2(-x, -y); + } + + inline void Vector2::operator+=(Vector2::Arg v) + { + x += v.x; + y += v.y; + } + + inline void Vector2::operator-=(Vector2::Arg v) + { + x -= v.x; + y -= v.y; + } + + inline void Vector2::operator*=(float s) + { + x *= s; + y *= s; + } + + inline void Vector2::operator*=(Vector2::Arg v) + { + x *= v.x; + y *= v.y; + } + + inline bool operator==(Vector2::Arg a, Vector2::Arg b) + { + return a.x == b.x && a.y == b.y; + } + inline bool operator!=(Vector2::Arg a, Vector2::Arg b) + { + return a.x != b.x || a.y != b.y; + } + + + // Vector3 + inline Vector3::Vector3() {} + inline Vector3::Vector3(float f) : x(f), y(f), z(f) {} + inline Vector3::Vector3(float x, float y, float z) : x(x), y(y), z(z) {} + inline Vector3::Vector3(Vector2::Arg v, float z) : x(v.x), y(v.y), z(z) {} + inline Vector3::Vector3(Vector3::Arg v) : x(v.x), y(v.y), z(v.z) {} + + inline const Vector3 & Vector3::operator=(Vector3::Arg v) + { + x = v.x; + y = v.y; + z = v.z; + return *this; + } + + + inline Vector2 Vector3::xy() const + { + return Vector2(x, y); + } + + inline const float * Vector3::ptr() const + { + return &x; + } + + inline void Vector3::set(float x, float y, float z) + { + this->x = x; + this->y = y; + this->z = z; + } + + inline Vector3 Vector3::operator-() const + { + return Vector3(-x, -y, -z); + } + + inline void Vector3::operator+=(Vector3::Arg v) + { + x += v.x; + y += v.y; + z += v.z; + } + + inline void Vector3::operator-=(Vector3::Arg v) + { + x -= v.x; + y -= v.y; + z -= v.z; + } + + inline void Vector3::operator*=(float s) + { + x *= s; + y *= s; + z *= s; + } + + inline void Vector3::operator/=(float s) + { + float is = 1.0f / s; + x *= is; + y *= is; + z *= is; + } + + inline void Vector3::operator*=(Vector3::Arg v) + { + x *= v.x; + y *= v.y; + z *= v.z; + } + + inline void Vector3::operator/=(Vector3::Arg v) + { + x /= v.x; + y /= v.y; + z /= v.z; + } + + inline bool operator==(Vector3::Arg a, Vector3::Arg b) + { + return a.x == b.x && a.y == b.y && a.z == b.z; + } + inline bool operator!=(Vector3::Arg a, Vector3::Arg b) + { + return a.x != b.x || a.y != b.y || a.z != b.z; + } + + + // Vector4 + inline Vector4::Vector4() {} + inline Vector4::Vector4(float f) : x(f), y(f), z(f), w(f) {} + inline Vector4::Vector4(float x, float y, float z, float w) : x(x), y(y), z(z), w(w) {} + inline Vector4::Vector4(Vector2::Arg v, float z, float w) : x(v.x), y(v.y), z(z), w(w) {} + inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {} + inline Vector4::Vector4(Vector3::Arg v, float w) : x(v.x), y(v.y), z(v.z), w(w) {} + inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {} + + inline const Vector4 & Vector4::operator=(const Vector4 & v) + { + x = v.x; + y = v.y; + z = v.z; + w = v.w; + return *this; + } + + inline Vector2 Vector4::xy() const + { + return Vector2(x, y); + } + + inline Vector2 Vector4::zw() const + { + return Vector2(z, w); + } + + inline Vector3 Vector4::xyz() const + { + return Vector3(x, y, z); + } + + inline const float * Vector4::ptr() const + { + return &x; + } + + inline void Vector4::set(float x, float y, float z, float w) + { + this->x = x; + this->y = y; + this->z = z; + this->w = w; + } + + inline Vector4 Vector4::operator-() const + { + return Vector4(-x, -y, -z, -w); + } + + inline void Vector4::operator+=(Vector4::Arg v) + { + x += v.x; + y += v.y; + z += v.z; + w += v.w; + } + + inline void Vector4::operator-=(Vector4::Arg v) + { + x -= v.x; + y -= v.y; + z -= v.z; + w -= v.w; + } + + inline void Vector4::operator*=(float s) + { + x *= s; + y *= s; + z *= s; + w *= s; + } + + inline void Vector4::operator/=(float s) + { + x /= s; + y /= s; + z /= s; + w /= s; + } + + inline void Vector4::operator*=(Vector4::Arg v) + { + x *= v.x; + y *= v.y; + z *= v.z; + w *= v.w; + } + + inline void Vector4::operator/=(Vector4::Arg v) + { + x /= v.x; + y /= v.y; + z /= v.z; + w /= v.w; + } + + inline bool operator==(Vector4::Arg a, Vector4::Arg b) + { + return a.x == b.x && a.y == b.y && a.z == b.z && a.w == b.w; + } + inline bool operator!=(Vector4::Arg a, Vector4::Arg b) + { + return a.x != b.x || a.y != b.y || a.z != b.z || a.w != b.w; + } + + + + // Functions + + + // Vector2 + + inline Vector2 add(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(a.x + b.x, a.y + b.y); + } + inline Vector2 operator+(Vector2::Arg a, Vector2::Arg b) + { + return add(a, b); + } + + inline Vector2 sub(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(a.x - b.x, a.y - b.y); + } + inline Vector2 operator-(Vector2::Arg a, Vector2::Arg b) + { + return sub(a, b); + } + + inline Vector2 scale(Vector2::Arg v, float s) + { + return Vector2(v.x * s, v.y * s); + } + + inline Vector2 scale(Vector2::Arg v, Vector2::Arg s) + { + return Vector2(v.x * s.x, v.y * s.y); + } + + inline Vector2 operator*(Vector2::Arg v, float s) + { + return scale(v, s); + } + + inline Vector2 operator*(Vector2::Arg v1, Vector2::Arg v2) + { + return Vector2(v1.x*v2.x, v1.y*v2.y); + } + + inline Vector2 operator*(float s, Vector2::Arg v) + { + return scale(v, s); + } + + inline Vector2 operator/(Vector2::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + inline Vector2 lerp(Vector2::Arg v1, Vector2::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector2(v1.x * s + t * v2.x, v1.y * s + t * v2.y); + } + + inline float dot(Vector2::Arg a, Vector2::Arg b) + { + return a.x * b.x + a.y * b.y; + } + + inline float lengthSquared(Vector2::Arg v) + { + return v.x * v.x + v.y * v.y; + } + + inline float length(Vector2::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float distance(Vector2::Arg a, Vector2::Arg b) + { + return length(a - b); + } + + inline float inverseLength(Vector2::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector2 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector2 normalizeFast(Vector2::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon); + } + + inline Vector2 min(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(min(a.x, b.x), min(a.y, b.y)); + } + + inline Vector2 max(Vector2::Arg a, Vector2::Arg b) + { + return Vector2(max(a.x, b.x), max(a.y, b.y)); + } + + inline Vector2 clamp(Vector2::Arg v, float min, float max) + { + return Vector2(clamp(v.x, min, max), clamp(v.y, min, max)); + } + + inline Vector2 saturate(Vector2::Arg v) + { + return Vector2(saturate(v.x), saturate(v.y)); + } + + inline bool isFinite(Vector2::Arg v) + { + return isFinite(v.x) && isFinite(v.y); + } + + inline Vector2 validate(Vector2::Arg v, Vector2::Arg fallback = Vector2(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector2 vf = v; + nv::floatCleanup(vf.component, 2); + return vf; + } + + // Note, this is the area scaled by 2! + inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1) + { + return (v0.x * v1.y - v0.y * v1.x); // * 0.5f; + } + inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c) + { + // IC: While it may be appealing to use the following expression: + //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f; + + // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point + // numbers and the results becomes very unstable and dependent on the order of the factors. + + // Instead, it's preferable to subtract the vertices first, and multiply the resulting small values together. The result + // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of + // the triangle. + + //return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f; + return triangleArea(a-c, b-c); + } + + + template <> + inline uint hash(const Vector2 & v, uint h) + { + return sdbmFloatHash(v.component, 2, h); + } + + + + // Vector3 + + inline Vector3 add(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.x + b.x, a.y + b.y, a.z + b.z); + } + inline Vector3 add(Vector3::Arg a, float b) + { + return Vector3(a.x + b, a.y + b, a.z + b); + } + inline Vector3 operator+(Vector3::Arg a, Vector3::Arg b) + { + return add(a, b); + } + inline Vector3 operator+(Vector3::Arg a, float b) + { + return add(a, b); + } + + inline Vector3 sub(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.x - b.x, a.y - b.y, a.z - b.z); + } + inline Vector3 sub(Vector3::Arg a, float b) + { + return Vector3(a.x - b, a.y - b, a.z - b); + } + inline Vector3 operator-(Vector3::Arg a, Vector3::Arg b) + { + return sub(a, b); + } + inline Vector3 operator-(Vector3::Arg a, float b) + { + return sub(a, b); + } + + inline Vector3 cross(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(a.y * b.z - a.z * b.y, a.z * b.x - a.x * b.z, a.x * b.y - a.y * b.x); + } + + inline Vector3 scale(Vector3::Arg v, float s) + { + return Vector3(v.x * s, v.y * s, v.z * s); + } + + inline Vector3 scale(Vector3::Arg v, Vector3::Arg s) + { + return Vector3(v.x * s.x, v.y * s.y, v.z * s.z); + } + + inline Vector3 operator*(Vector3::Arg v, float s) + { + return scale(v, s); + } + + inline Vector3 operator*(float s, Vector3::Arg v) + { + return scale(v, s); + } + + inline Vector3 operator*(Vector3::Arg v, Vector3::Arg s) + { + return scale(v, s); + } + + inline Vector3 operator/(Vector3::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, float s) + { + return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s); + }*/ + + inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector3(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z); + } + + inline float dot(Vector3::Arg a, Vector3::Arg b) + { + return a.x * b.x + a.y * b.y + a.z * b.z; + } + + inline float lengthSquared(Vector3::Arg v) + { + return v.x * v.x + v.y * v.y + v.z * v.z; + } + + inline float length(Vector3::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float distance(Vector3::Arg a, Vector3::Arg b) + { + return length(a - b); + } + + inline float distanceSquared(Vector3::Arg a, Vector3::Arg b) + { + return lengthSquared(a - b); + } + + inline float inverseLength(Vector3::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector3 normalize(Vector3::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector3 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector3 normalizeSafe(Vector3::Arg v, Vector3::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector3 normalizeFast(Vector3::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon); + } + + inline Vector3 min(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z)); + } + + inline Vector3 max(Vector3::Arg a, Vector3::Arg b) + { + return Vector3(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z)); + } + + inline Vector3 clamp(Vector3::Arg v, float min, float max) + { + return Vector3(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max)); + } + + inline Vector3 saturate(Vector3::Arg v) + { + return Vector3(saturate(v.x), saturate(v.y), saturate(v.z)); + } + + inline Vector3 floor(Vector3::Arg v) + { + return Vector3(floorf(v.x), floorf(v.y), floorf(v.z)); + } + + inline Vector3 ceil(Vector3::Arg v) + { + return Vector3(ceilf(v.x), ceilf(v.y), ceilf(v.z)); + } + + inline bool isFinite(Vector3::Arg v) + { + return isFinite(v.x) && isFinite(v.y) && isFinite(v.z); + } + + inline Vector3 validate(Vector3::Arg v, Vector3::Arg fallback = Vector3(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector3 vf = v; + nv::floatCleanup(vf.component, 3); + return vf; + } + + inline Vector3 reflect(Vector3::Arg v, Vector3::Arg n) + { + return v - (2 * dot(v, n)) * n; + } + + template <> + inline uint hash(const Vector3 & v, uint h) + { + return sdbmFloatHash(v.component, 3, h); + } + + + // Vector4 + + inline Vector4 add(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w); + } + inline Vector4 operator+(Vector4::Arg a, Vector4::Arg b) + { + return add(a, b); + } + + inline Vector4 sub(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w); + } + inline Vector4 operator-(Vector4::Arg a, Vector4::Arg b) + { + return sub(a, b); + } + + inline Vector4 scale(Vector4::Arg v, float s) + { + return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); + } + + inline Vector4 scale(Vector4::Arg v, Vector4::Arg s) + { + return Vector4(v.x * s.x, v.y * s.y, v.z * s.z, v.w * s.w); + } + + inline Vector4 operator*(Vector4::Arg v, float s) + { + return scale(v, s); + } + + inline Vector4 operator*(float s, Vector4::Arg v) + { + return scale(v, s); + } + + inline Vector4 operator*(Vector4::Arg v, Vector4::Arg s) + { + return scale(v, s); + } + + inline Vector4 operator/(Vector4::Arg v, float s) + { + return scale(v, 1.0f/s); + } + + /*inline Vector4 add_scaled(Vector4::Arg a, Vector4::Arg b, float s) + { + return Vector4(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s, a.w + b.w * s); + }*/ + + inline Vector4 lerp(Vector4::Arg v1, Vector4::Arg v2, float t) + { + const float s = 1.0f - t; + return Vector4(v1.x * s + t * v2.x, v1.y * s + t * v2.y, v1.z * s + t * v2.z, v1.w * s + t * v2.w); + } + + inline float dot(Vector4::Arg a, Vector4::Arg b) + { + return a.x * b.x + a.y * b.y + a.z * b.z + a.w * b.w; + } + + inline float lengthSquared(Vector4::Arg v) + { + return v.x * v.x + v.y * v.y + v.z * v.z + v.w * v.w; + } + + inline float length(Vector4::Arg v) + { + return sqrtf(lengthSquared(v)); + } + + inline float inverseLength(Vector4::Arg v) + { + return 1.0f / sqrtf(lengthSquared(v)); + } + + inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON) + { + return equal(length(v), 1, epsilon); + } + + inline Vector4 normalize(Vector4::Arg v, float epsilon = NV_EPSILON) + { + float l = length(v); + nvDebugCheck(!isZero(l, epsilon)); + Vector4 n = scale(v, 1.0f / l); + nvDebugCheck(isNormalized(n)); + return n; + } + + inline Vector4 normalizeSafe(Vector4::Arg v, Vector4::Arg fallback, float epsilon = NV_EPSILON) + { + float l = length(v); + if (isZero(l, epsilon)) { + return fallback; + } + return scale(v, 1.0f / l); + } + + // Safe, branchless normalization from Andy Firth. All error checking ommitted. + // http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/ + inline Vector4 normalizeFast(Vector4::Arg v) + { + const float very_small_float = 1.0e-037f; + float l = very_small_float + length(v); + return scale(v, 1.0f / l); + } + + inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON) + { + return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon); + } + + inline Vector4 min(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(min(a.x, b.x), min(a.y, b.y), min(a.z, b.z), min(a.w, b.w)); + } + + inline Vector4 max(Vector4::Arg a, Vector4::Arg b) + { + return Vector4(max(a.x, b.x), max(a.y, b.y), max(a.z, b.z), max(a.w, b.w)); + } + + inline Vector4 clamp(Vector4::Arg v, float min, float max) + { + return Vector4(clamp(v.x, min, max), clamp(v.y, min, max), clamp(v.z, min, max), clamp(v.w, min, max)); + } + + inline Vector4 saturate(Vector4::Arg v) + { + return Vector4(saturate(v.x), saturate(v.y), saturate(v.z), saturate(v.w)); + } + + inline bool isFinite(Vector4::Arg v) + { + return isFinite(v.x) && isFinite(v.y) && isFinite(v.z) && isFinite(v.w); + } + + inline Vector4 validate(Vector4::Arg v, Vector4::Arg fallback = Vector4(0.0f)) + { + if (!isFinite(v)) return fallback; + Vector4 vf = v; + nv::floatCleanup(vf.component, 4); + return vf; + } + + template <> + inline uint hash(const Vector4 & v, uint h) + { + return sdbmFloatHash(v.component, 4, h); + } + + +#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float + + //int: + + inline Vector2 scale(Vector2::Arg v, int s) + { + return Vector2(v.x * s, v.y * s); + } + + inline Vector2 operator*(Vector2::Arg v, int s) + { + return scale(v, s); + } + + inline Vector2 operator*(int s, Vector2::Arg v) + { + return scale(v, s); + } + + inline Vector2 operator/(Vector2::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + inline Vector3 scale(Vector3::Arg v, int s) + { + return Vector3(v.x * s, v.y * s, v.z * s); + } + + inline Vector3 operator*(Vector3::Arg v, int s) + { + return scale(v, s); + } + + inline Vector3 operator*(int s, Vector3::Arg v) + { + return scale(v, s); + } + + inline Vector3 operator/(Vector3::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + inline Vector4 scale(Vector4::Arg v, int s) + { + return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); + } + + inline Vector4 operator*(Vector4::Arg v, int s) + { + return scale(v, s); + } + + inline Vector4 operator*(int s, Vector4::Arg v) + { + return scale(v, s); + } + + inline Vector4 operator/(Vector4::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + //double: + + inline Vector3 operator*(Vector3::Arg v, double s) + { + return scale(v, (float)s); + } + + inline Vector3 operator*(double s, Vector3::Arg v) + { + return scale(v, (float)s); + } + + inline Vector3 operator/(Vector3::Arg v, double s) + { + return scale(v, 1.f/((float)s)); + } + +#endif //NV_OS_IOS + +} // nv namespace + +#endif // NV_MATH_VECTOR_INL diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index 6016f28..d15a506 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -194,15 +194,20 @@ namespace nv #endif } - inline uint log2(uint i) + inline uint log2(uint32 i) { - uint value = 0; - while( i >>= 1 ) { - value++; - } + uint32 value = 0; + while( i >>= 1 ) value++; return value; } + inline uint log2(uint64 i) + { + uint64 value = 0; + while (i >>= 1) value++; + return U32(value); + } + inline float lerp(float f0, float f1, float t) { const float s = 1.0f - t; diff --git a/src/nvthread/Atomic.h b/src/nvthread/Atomic.h index b8eaedb..970fec7 100644 --- a/src/nvthread/Atomic.h +++ b/src/nvthread/Atomic.h @@ -106,6 +106,11 @@ namespace nv { #error "Atomics not implemented." #endif } + + inline void storeRelease(volatile float * ptr, float value) + { + storeRelease((uint32 *)ptr, *(uint32 *)&value); + } template diff --git a/src/nvthread/Event.cpp b/src/nvthread/Event.cpp index 92903a8..b6d8c15 100644 --- a/src/nvthread/Event.cpp +++ b/src/nvthread/Event.cpp @@ -17,7 +17,7 @@ struct Event::Private { }; Event::Event() : m(new Private) { - m->handle = CreateEvent(NULL, FALSE, FALSE, NULL); + m->handle = CreateEvent(/*lpEventAttributes=*/NULL, /*bManualReset=*/FALSE, /*bInitialState=*/FALSE, /*lpName=*/NULL); } Event::~Event() { diff --git a/src/nvthread/Mutex.cpp b/src/nvthread/Mutex.cpp index 9d4aa66..fe7fe68 100644 --- a/src/nvthread/Mutex.cpp +++ b/src/nvthread/Mutex.cpp @@ -13,7 +13,9 @@ #endif // NV_OS -#if NV_USE_TELEMETRY +#if NV_USE_TELEMETRY3 +#include +#elif NV_USE_TELEMETRY #include extern HTELEMETRY tmContext; #endif @@ -45,14 +47,19 @@ Mutex::~Mutex () void Mutex::lock() { -#if NV_USE_TELEMETRY +#if NV_USE_TELEMETRY3 + tmStartWaitForLock(0, 0, this, m->name); +#elif NV_USE_TELEMETRY TmU64 matcher; tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); #endif EnterCriticalSection(&m->mutex); -#if NV_USE_TELEMETRY +#if NV_USE_TELEMETRY3 + tmEndWaitForLock(0); + tmAcquiredLock(0, 0, this, m->name); +#elif NV_USE_TELEMETRY tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS); tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired"); #endif @@ -60,7 +67,18 @@ void Mutex::lock() bool Mutex::tryLock() { -#if NV_USE_TELEMETRY +#if NV_USE_TELEMETRY3 + tmStartWaitForLock(0, 0, this, m->name); + if (TryEnterCriticalSection(&m->mutex) != 0) { + tmEndWaitForLock(0); + tmAcquiredLock(0, 0, this, m->name); + return true; + } + else { + tmEndWaitForLock(0); + return false; + } +#elif NV_USE_TELEMETRY TmU64 matcher; tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); if (TryEnterCriticalSection(&m->mutex) != 0) { @@ -79,7 +97,9 @@ bool Mutex::tryLock() void Mutex::unlock() { -#if NV_USE_TELEMETRY +#if NV_USE_TELEMETRY3 + tmReleasedLock(0, this); +#elif NV_USE_TELEMETRY tmSetLockState(tmContext, this, TMLS_RELEASED, "released"); #endif @@ -90,13 +110,17 @@ void Mutex::unlock() struct Mutex::Private { pthread_mutex_t mutex; + pthread_mutexattr_t attr; const char * name; }; Mutex::Mutex (const char * name) : m(new Private) { - int result = pthread_mutex_init(&m->mutex, NULL); + pthread_mutexattr_init(&m->attr); + pthread_mutexattr_settype(&m->attr, PTHREAD_MUTEX_RECURSIVE); + int result = pthread_mutex_init(&m->mutex, &m->attr); + //m->mutex = PTHREAD_MUTEX_INITIALIZER; m->name = name; nvDebugCheck(result == 0); } @@ -105,6 +129,8 @@ Mutex::~Mutex () { int result = pthread_mutex_destroy(&m->mutex); nvDebugCheck(result == 0); + result = pthread_mutexattr_destroy(&m->attr); + nvDebugCheck(result == 0); } void Mutex::lock() diff --git a/src/nvthread/Thread.cpp b/src/nvthread/Thread.cpp index 869d7e1..b72ba5a 100644 --- a/src/nvthread/Thread.cpp +++ b/src/nvthread/Thread.cpp @@ -9,7 +9,11 @@ #include // usleep #endif -#if NV_USE_TELEMETRY +#include "nvcore/StrLib.h" + +#if NV_USE_TELEMETRY3 +#include +#elif NV_USE_TELEMETRY #include extern HTELEMETRY tmContext; #endif @@ -118,16 +122,12 @@ void Thread::start(ThreadFunc * func, void * arg) nvDebugCheck(p->thread != NULL); if (p->name != NULL) { setThreadName(threadId, p->name); - #if NV_USE_TELEMETRY + #if NV_USE_TELEMETRY3 + tmThreadName(0, threadId, p->name); + #elif NV_USE_TELEMETRY tmThreadName(tmContext, threadId, p->name); #endif } -#elif NV_OS_ORBIS - int ret = scePthreadCreate(&p->thread, NULL, threadFunc, p.ptr(), p->name ? p->name : "nv::Thread"); - nvDebugCheck(ret == 0); - // use any non-system core - scePthreadSetaffinity(p->thread, 0x3F); - scePthreadSetprio(p->thread, (SCE_KERNEL_PRIO_FIFO_DEFAULT + SCE_KERNEL_PRIO_FIFO_HIGHEST) / 2); #elif NV_OS_USE_PTHREAD int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr()); nvDebugCheck(result == 0); diff --git a/src/nvthread/ThreadPool.cpp b/src/nvthread/ThreadPool.cpp index 8373870..40f39bb 100644 --- a/src/nvthread/ThreadPool.cpp +++ b/src/nvthread/ThreadPool.cpp @@ -8,7 +8,9 @@ #include "nvcore/Utils.h" #include "nvcore/StrLib.h" -#if NV_USE_TELEMETRY +#if NV_USE_TELEMETRY3 +#include +#elif NV_USE_TELEMETRY #include extern HTELEMETRY tmContext; #endif @@ -84,7 +86,9 @@ AutoPtr s_pool; } { -#if NV_USE_TELEMETRY +#if NV_USE_TELEMETRY3 + tmZone(0, TMZF_NONE, "worker"); +#elif NV_USE_TELEMETRY tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker"); #endif func(s_pool->arg, s_pool->useCallingThread + i); @@ -116,11 +120,11 @@ ThreadPool::ThreadPool(uint workerCount/*=processorCount()*/, bool useThreadAffi lockThreadToProcessor(0); // Calling thread always locked to processor 0. } + StringBuilder name; for (uint i = 0; i < threadCount; i++) { - StringBuilder name; name.format("worker %d", i); workers[i].setName(name.release()); // @Leak - workers[i].start(workerFunc, (void *)i); + workers[i].start(workerFunc, (void *)(uintptr_t)i); } allIdle = true; @@ -141,9 +145,6 @@ ThreadPool::~ThreadPool() void ThreadPool::run(ThreadTask * func, void * arg) { - // Wait until threads are idle. - wait(); - start(func, arg); if (useCallingThread) { diff --git a/src/nvthread/nvthread.cpp b/src/nvthread/nvthread.cpp index 38b5a86..b727f2e 100644 --- a/src/nvthread/nvthread.cpp +++ b/src/nvthread/nvthread.cpp @@ -85,7 +85,9 @@ uint nv::processorCount() { return count; #elif NV_OS_ORBIS - return 6; + return 6; +#elif NV_OS_DURANGO + return 6; #elif NV_OS_XBOX return 3; // or 6? #elif NV_OS_LINUX || NV_OS_NETBSD // Linux, Solaris, & AIX diff --git a/src/nvtt/BlockCompressor.cpp b/src/nvtt/BlockCompressor.cpp index 89e15fb..5b703ac 100644 --- a/src/nvtt/BlockCompressor.cpp +++ b/src/nvtt/BlockCompressor.cpp @@ -25,6 +25,7 @@ #include "BlockCompressor.h" #include "OutputOptions.h" #include "TaskDispatcher.h" +#include "CompressionOptions.h" #include "nvimage/Image.h" #include "nvimage/ColorBlock.h" @@ -33,6 +34,7 @@ #include "nvmath/Vector.inl" #include "nvcore/Memory.h" +#include "nvcore/Array.inl" #include // placement new @@ -40,85 +42,13 @@ using namespace nv; using namespace nvtt; -/* -// OpenMP -#if defined(HAVE_OPENMP) -#include -#endif - -void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - const uint bs = blockSize(); - const uint bw = (w + 3) / 4; - const uint bh = (h + 3) / 4; - -#if defined(HAVE_OPENMP) - bool singleThreaded = false; -#else - bool singleThreaded = true; -#endif - - // Use a single thread to compress small textures. - if (bw * bh < 16) singleThreaded = true; - - if (singleThreaded) - { - nvDebugCheck(bs <= 16); - uint8 mem[16]; // @@ Output one row at a time! - - for (int y = 0; y < int(h); y += 4) { - for (uint x = 0; x < w; x += 4) { - - ColorBlock rgba; - rgba.init(w, h, data, x, y); - - compressBlock(rgba, alphaMode, compressionOptions, mem); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(mem, bs); - } - } - } - } -#if defined(HAVE_OPENMP) - else - { - const uint size = bs * bw * bh; - uint8 * mem = new uint8[size]; - - #pragma omp parallel - { - #pragma omp for - for (int i = 0; i < int(bw*bh); i++) - { - const uint x = i % bw; - const uint y = i / bw; - - ColorBlock rgba; - rgba.init(w, h, data, 4*x, 4*y); - - uint8 * ptr = mem + (y * bw + x) * bs; - compressBlock(rgba, alphaMode, compressionOptions, ptr); - } // omp for - } // omp parallel - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(mem, size); - } - - delete [] mem; - } -#endif -} -*/ - struct CompressorContext { - nvtt::AlphaMode alphaMode; + AlphaMode alphaMode; uint w, h, d; const float * data; - const nvtt::CompressionOptions::Private * compressionOptions; + const CompressionOptions::Private * compressionOptions; uint bw, bh, bs; uint8 * mem; @@ -144,7 +74,7 @@ void ColorBlockCompressorTask(void * data, int i) } } -void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +void ColorBlockCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) { nvDebugCheck(d == 1); @@ -182,66 +112,6 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u delete [] context.mem; } - -#if 0 -// Each task compresses one block. -void ColorSetCompressorTask(void * data, int i) -{ - CompressorContext * d = (CompressorContext *) data; - - uint x = i % d->bw; - uint y = i / d->bw; - - //for (uint x = 0; x < d->bw; x++) - { - ColorSet set; - set.setColors(d->data, d->w, d->h, x * 4, y * 4); - - uint8 * ptr = d->mem + (y * d->bw + x) * d->bs; - ((ColorSetCompressor *)d->compressor)->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr); - } -} - - -void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) -{ - nvDebugCheck(d == 1); - - CompressorContext context; - context.alphaMode = alphaMode; - context.w = w; - context.h = h; - context.data = data; - context.compressionOptions = &compressionOptions; - - context.bs = blockSize(); - context.bw = (w + 3) / 4; - context.bh = (h + 3) / 4; - - context.compressor = this; - - SequentialTaskDispatcher sequential; - - // Use a single thread to compress small textures. - if (context.bh < 4) dispatcher = &sequential; - -#if _DEBUG - dispatcher = &sequential; -#endif - - const uint count = context.bw * context.bh; - const uint size = context.bs * count; - context.mem = new uint8[size]; - - dispatcher->dispatch(ColorSetCompressorTask, &context, count); - - outputOptions.writeData(context.mem, size); - - delete [] context.mem; -} -#endif // 0 - - // Each task compresses one block. void FloatColorCompressorTask(void * data, int i) { @@ -262,8 +132,8 @@ void FloatColorCompressorTask(void * data, int i) Vector4 colors[16]; float weights[16]; - const uint block_w = min(d->w - block_x * 4U, 4U); - const uint block_h = min(d->h - block_y * 4U, 4U); + const uint block_w = min(d->w - block_x * 4, 4U); + const uint block_h = min(d->h - block_y * 4, 4U); uint x, y; for (y = 0; y < block_h; y++) { @@ -274,7 +144,7 @@ void FloatColorCompressorTask(void * data, int i) colors[dst_idx].y = g[src_idx]; colors[dst_idx].z = b[src_idx]; colors[dst_idx].w = a[src_idx]; - weights[dst_idx] = (d->alphaMode == nvtt::AlphaMode_Transparency) ? a[src_idx] : 1.0f; + weights[dst_idx] = (d->alphaMode == AlphaMode_Transparency) ? saturate(a[src_idx]) : 1.0f; } for (; x < 4; x++) { uint dst_idx = 4 * y + x; @@ -289,14 +159,14 @@ void FloatColorCompressorTask(void * data, int i) weights[dst_idx] = 0.0f; } } - + // Compress block. uint8 * output = d->mem + (block_y * d->bw + block_x) * d->bs; ((FloatColorCompressor *)d->compressor)->compressBlock(colors, weights, *d->compressionOptions, output); } -void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) { nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures. @@ -308,7 +178,7 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, context.data = data; context.compressionOptions = &compressionOptions; - context.bs = blockSize(); + context.bs = blockSize(compressionOptions); context.bw = (w + 3) / 4; context.bh = (h + 3) / 4; @@ -333,3 +203,466 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, delete [] context.mem; } + + +// BC1 +#include "CompressorDXT1.h" + +void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + compress_dxt1_fast(colors, weights, compressionOptions.colorWeight.xyz(), (BlockDXT1 *)output); +} +void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output); +} + + +// @@ BC1a + +// @@ BC2 + +// @@ BC3 + + +// BC3_RGBM +#include "CompressorDXT5_RGBM.h" + +void CompressorBC3_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + compress_dxt5_rgbm(colors, weights, compressionOptions.rgbmThreshold, (BlockDXT5 *)output); +} + + +// ETC +#include "CompressorETC.h" + +void CompressorETC1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + compress_etc1(colors, weights, compressionOptions.colorWeight.xyz(), output); +} +void CompressorETC2_R::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + // @@ Change radius based on quality. + compress_eac(colors, weights, /*input_channel=*/1, /*search_radius=*/1, /*use_11bit_mode=*/true, output); +} +void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + //compress_eac_rg(colors, weights, 1, 2, output); +} +void CompressorETC2_RGB::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + // @@ Tweak quality options. + compress_etc2(colors, weights, compressionOptions.colorWeight.xyz(), output); +} +void CompressorETC2_RGBA::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + // @@ Tweak quality options. + // @@ Change radius based on quality. + compress_etc2_eac(colors, weights, compressionOptions.colorWeight.xyz(), output); +} +/*void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + // @@ Change radius based on quality. + compress_eac_rg(colors, weights, compressionOptions.colorWeight.xyz(), output); +}*/ +void CompressorETC2_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +{ + compress_etc2_rgbm(colors, weights, compressionOptions.rgbmThreshold, output); +} + + + +// External compressors. + +#if defined(HAVE_ATITC) + +typedef int BOOL; +typedef _W64 unsigned long ULONG_PTR; +typedef ULONG_PTR DWORD_PTR; +#include "atitc/ATI_Compress.h" + +void AtiCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + + // Init source texture + ATI_TC_Texture srcTexture; + srcTexture.dwSize = sizeof(srcTexture); + srcTexture.dwWidth = w; + srcTexture.dwHeight = h; + if (inputFormat == InputFormat_BGRA_8UB) + { + srcTexture.dwPitch = w * 4; + srcTexture.format = ATI_TC_FORMAT_ARGB_8888; + } + else + { + // @@ Floating point input is not swizzled. + srcTexture.dwPitch = w * 16; + srcTexture.format = ATI_TC_FORMAT_ARGB_32F; + } + srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); + srcTexture.pData = (ATI_TC_BYTE*) data; + + // Init dest texture + ATI_TC_Texture destTexture; + destTexture.dwSize = sizeof(destTexture); + destTexture.dwWidth = w; + destTexture.dwHeight = h; + destTexture.dwPitch = 0; + destTexture.format = ATI_TC_FORMAT_DXT1; + destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); + destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); + + ATI_TC_CompressOptions options; + options.dwSize = sizeof(options); + options.bUseChannelWeighting = false; + options.bUseAdaptiveWeighting = false; + options.bDXT1UseAlpha = false; + options.nCompressionSpeed = ATI_TC_Speed_Normal; + options.bDisableMultiThreading = false; + //options.bDisableMultiThreading = true; + + // Compress + ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); + } + + mem::free(destTexture.pData); +} + +void AtiCompressorDXT5::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + + // Init source texture + ATI_TC_Texture srcTexture; + srcTexture.dwSize = sizeof(srcTexture); + srcTexture.dwWidth = w; + srcTexture.dwHeight = h; + if (inputFormat == InputFormat_BGRA_8UB) + { + srcTexture.dwPitch = w * 4; + srcTexture.format = ATI_TC_FORMAT_ARGB_8888; + } + else + { + srcTexture.dwPitch = w * 16; + srcTexture.format = ATI_TC_FORMAT_ARGB_32F; + } + srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); + srcTexture.pData = (ATI_TC_BYTE*) data; + + // Init dest texture + ATI_TC_Texture destTexture; + destTexture.dwSize = sizeof(destTexture); + destTexture.dwWidth = w; + destTexture.dwHeight = h; + destTexture.dwPitch = 0; + destTexture.format = ATI_TC_FORMAT_DXT5; + destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); + destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); + + // Compress + ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); + } + + mem::free(destTexture.pData); +} + +#endif // defined(HAVE_ATITC) + +#if defined(HAVE_SQUISH) + +//#include "squish/squish.h" +#include "squish-1.10/squish.h" + +void SquishCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + nvDebugCheck(false); + +#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB") + /* + Image img(*image); + int count = img.width() * img.height(); + for (int i = 0; i < count; i++) + { + Color32 c = img.pixel(i); + img.pixel(i) = Color32(c.b, c.g, c.r, c.a); + } + + int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1); + void * blocks = mem::malloc(size); + + squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(blocks, size); + } + + mem::free(blocks); + */ +} + +#endif // defined(HAVE_SQUISH) + + +#if defined(HAVE_D3DX) + +void D3DXCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + nvDebugCheck(d == 1); + + IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION); + + D3DPRESENT_PARAMETERS presentParams; + ZeroMemory(&presentParams, sizeof(presentParams)); + presentParams.Windowed = TRUE; + presentParams.SwapEffect = D3DSWAPEFFECT_COPY; + presentParams.BackBufferWidth = 8; + presentParams.BackBufferHeight = 8; + presentParams.BackBufferFormat = D3DFMT_UNKNOWN; + + HRESULT err; + + IDirect3DDevice9 * device = NULL; + err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device); + + IDirect3DTexture9 * texture = NULL; + err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture); + + IDirect3DSurface9 * surface = NULL; + err = texture->GetSurfaceLevel(0, &surface); + + RECT rect; + rect.left = 0; + rect.top = 0; + rect.bottom = h; + rect.right = w; + + if (inputFormat == InputFormat_BGRA_8UB) + { + err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0); + } + else + { + err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0); + } + + if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA) + { + D3DLOCKED_RECT rect; + ZeroMemory(&rect, sizeof(rect)); + + err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY); + + if (outputOptions.outputHandler != NULL) { + int size = rect.Pitch * ((h + 3) / 4); + outputOptions.outputHandler->writeData(rect.pBits, size); + } + + err = surface->UnlockRect(); + } + + surface->Release(); + device->Release(); + d3d->Release(); +} + +#endif // defined(HAVE_D3DX) + + +#if defined(HAVE_STB) + +#define STB_DEFINE +#include "stb/stb_dxt.h" + +void StbCompressorDXT1::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) +{ + rgba.swizzle(2, 1, 0, 3); // Swap R and B + stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0); +} + +#endif // defined(HAVE_STB) + + +#if defined(HAVE_ETCLIB) +#include "Etc.h" + +void EtcLibCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + //nvCheck(d == 1); // Encode one layer at a time? + + Etc::Image::Format format; + if (compressionOptions.format == Format_ETC1) { + format = Etc::Image::Format::ETC1; + } + else if (compressionOptions.format == Format_ETC2_R) { + format = Etc::Image::Format::R11; + } + else if (compressionOptions.format == Format_ETC2_RG) { + format = Etc::Image::Format::RG11; + } + else if (compressionOptions.format == Format_ETC2_RGB) { + format = Etc::Image::Format::RGB8; + //format = Etc::Image::Format::SRGB8; + } + else if (compressionOptions.format == Format_ETC2_RGBA) { + format = Etc::Image::Format::RGBA8; + //format = Etc::Image::Format::SRGBA8; + } + else if (compressionOptions.format == Format_ETC2_RGB_A1) { + format = Etc::Image::Format::RGB8A1; + //format = Etc::Image::Format::SRGB8A1; + } + else { + nvCheck(false); + return; + } + + Etc::ErrorMetric error_metric = Etc::ErrorMetric::RGBA; + + // @@ Use normal compression metric for normals? + //if (compressionOptions.) + + // @@ Adjust based on quality. + int effort = ETCCOMP_DEFAULT_EFFORT_LEVEL; + + // @@ What are the defaults? + uint jobs = 4; + uint max_jobs = 4; + + uint8 * out_data = NULL; + uint out_size = 0; + uint out_width = 0; + uint out_height = 0; + int out_time = 0; + + // Swizzle color data. + nv::Array tmp; + uint count = w * h; + tmp.resize(4 * count); + for (uint i = 0; i < count; i++) { + tmp[4*i+0] = data[count*0 + i]; + tmp[4*i+1] = data[count*1 + i]; + tmp[4*i+2] = data[count*2 + i]; + tmp[4*i+3] = data[count*3 + i]; + } + + Etc::Encode(tmp.buffer(), w, h, format, error_metric, effort, jobs, max_jobs, &out_data, &out_size, &out_width, &out_height, &out_time); + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(out_data, I32(out_size)); + } +} + +#endif + +#if defined(HAVE_RGETC) +#include "rg_etc1.h" + +NV_AT_STARTUP(rg_etc1::pack_etc1_block_init()); + +void RgEtcCompressor::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) +{ + rg_etc1::etc1_pack_params pack_params; + + pack_params.m_quality = rg_etc1::cMediumQuality; + if (compressionOptions.quality == Quality_Fastest) pack_params.m_quality = rg_etc1::cLowQuality; + else if (compressionOptions.quality == Quality_Production) pack_params.m_quality = rg_etc1::cHighQuality; + else if (compressionOptions.quality == Quality_Highest) pack_params.m_quality = rg_etc1::cHighQuality; + else if (compressionOptions.quality == Quality_Normal) pack_params.m_quality = rg_etc1::cMediumQuality; + + rgba.swizzle(2, 1, 0, 3); + rg_etc1::pack_etc1_block(output, (uint *)rgba.colors(), pack_params); + + //Vector4 result[16]; + //nv::decompress_etc(output, result); + +} + +#endif + + +#if defined(HAVE_PVRTEXTOOL) + +#include // for CPVRTexture, CPVRTextureHeader, PixelType, Transcode + +#include "nvmath/Color.inl" + +void CompressorPVR::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) +{ + EPVRTColourSpace color_space = ePVRTCSpacelRGB; + + //pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('b','g','r','a',8,8,8,8); + pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('r','g','b',0,8,8,8,0); + pvrtexture::CPVRTextureHeader header(src_pixel_type.PixelTypeID, w, h, d, 1/*num mips*/, 1/*num array*/, 1/*num faces*/, color_space, ePVRTVarTypeUnsignedByteNorm); + + /* + uint count = w * h * d; + Array tmp; + tmp.resize(count); + + for (uint i = 0; i < count; i++) { + tmp[i] = toColor32(Vector4(data[0*count + i], data[1*count + i], data[2*count + i], data[3*count + i])); + } + */ + + uint count = w * h * d; + Array tmp; + tmp.resize(3 * count); + + for (uint i = 0; i < count; i++) { + tmp[3*i+0] = data[0*count + i] * 255.0f; + tmp[3*i+1] = data[1*count + i] * 255.0f; + tmp[3*i+2] = data[2*count + i] * 255.0f; + } + + pvrtexture::CPVRTexture texture(header, tmp.buffer()); + + pvrtexture::PixelType dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB); + + if (compressionOptions.format == Format_PVR_2BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB); + else if (compressionOptions.format == Format_PVR_4BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGB); + else if (compressionOptions.format == Format_PVR_2BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGBA); + else if (compressionOptions.format == Format_PVR_4BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGBA); + + bool success = pvrtexture::Transcode(texture, dst_pixel_type, ePVRTVarTypeUnsignedByteNorm, color_space, pvrtexture::ePVRTCNormal, false); + + if (success) { + uint size = 0; + if (compressionOptions.format == Format_PVR_2BPP_RGB || compressionOptions.format == Format_PVR_2BPP_RGBA) { + // 2 bpp + const uint bpp = 2u; + const uint block_size = 8u * 4u; + const uint size_factor=(block_size*bpp)>>3u; + const uint block_width=nv::max((w>>3u), 2u); + const uint block_height=nv::max((h>>2u), 2u); + size = d * block_width * block_height * size_factor; + } + else { + // 4 bpp + const uint bpp = 4u; + const uint block_size = 4u * 4u; + const uint size_factor = (block_size*bpp) >> 3u; + const uint block_width = max((w>>2u), 2u); + const uint block_height = max((h>>2u), 2u); + size = d * block_width * block_height * size_factor; + } + + if (outputOptions.outputHandler != NULL) { + outputOptions.outputHandler->writeData(texture.getDataPtr(), I32(size)); + } + } +} + +#endif + + diff --git a/src/nvtt/BlockCompressor.h b/src/nvtt/BlockCompressor.h index 7514bde..63a9b7c 100644 --- a/src/nvtt/BlockCompressor.h +++ b/src/nvtt/BlockCompressor.h @@ -27,7 +27,6 @@ #include "Compressor.h" - namespace nv { struct ColorBlock; @@ -45,9 +44,149 @@ namespace nv { virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; - virtual uint blockSize() const = 0; + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; + virtual uint blockSize(const nvtt::CompressionOptions::Private & compressionOptions) const = 0; + }; + + + // BC1 + struct FastCompressorDXT1 : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; } + }; + struct CompressorDXT1 : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; } + }; + + // BC3 + struct CompressorBC3_RGBM : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; } + }; + + + // ETC + struct CompressorETC1 : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; } + }; + struct CompressorETC2_R : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; } + }; + struct CompressorETC2_RG : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; } + }; + struct CompressorETC2_RGB : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; } + }; + struct CompressorETC2_RGBA : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; } + }; + struct CompressorETC2_RGBM : public FloatColorCompressor + { + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; } + }; + + + // External compressors. +#if defined(HAVE_ATITC) + struct AtiCompressorDXT1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; + + struct AtiCompressorDXT5 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if defined(HAVE_SQUISH) + struct SquishCompressorDXT1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if defined(HAVE_D3DX) + struct D3DXCompressorDXT1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if defined(HAVE_STB) + struct StbCompressorDXT1 : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; +#endif + +#if NV_USE_CRUNCH + struct CrunchCompressorETC1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if NV_USE_INTEL_ISPC_TC + struct IspcCompressorBC1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; + + struct IspcCompressorBC3 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; + + struct IspcCompressorBC7 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; + + struct IspcCompressorETC1 : public CompressorInterface + { + virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if defined(HAVE_ETCLIB) + struct EtcLibCompressor : public CompressorInterface + { + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); + }; +#endif + +#if defined(HAVE_RGETC) + struct RgEtcCompressor : public ColorBlockCompressor + { + virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize() const { return 8; } + }; +#endif + +#if defined(HAVE_PVRTEXTOOL) + struct CompressorPVR : public CompressorInterface + { + virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); }; +#endif } // nv namespace diff --git a/src/nvtt/CMakeLists.txt b/src/nvtt/CMakeLists.txt index 9f1e394..445aa1e 100644 --- a/src/nvtt/CMakeLists.txt +++ b/src/nvtt/CMakeLists.txt @@ -13,6 +13,7 @@ SET(NVTT_SRCS CompressorDX11.h CompressorDX11.cpp CompressorDXT1.h CompressorDXT1.cpp CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp + CompressorETC.h CompressorETC.cpp CompressorRGB.h CompressorRGB.cpp Context.h Context.cpp QuickCompressDXT.h QuickCompressDXT.cpp @@ -38,6 +39,7 @@ IF (CUDA_FOUND) ENDIF (CUDA_FOUND) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) +INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104) ADD_DEFINITIONS(-DNVTT_EXPORTS) @@ -47,7 +49,7 @@ ELSE(NVTT_SHARED) ADD_LIBRARY(nvtt ${NVTT_SRCS}) ENDIF(NVTT_SHARED) -TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath) +TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath rg_etc1) INSTALL(TARGETS nvtt RUNTIME DESTINATION bin diff --git a/src/nvtt/ClusterFit.cpp b/src/nvtt/ClusterFit.cpp index b3b2f1a..39bbbc4 100644 --- a/src/nvtt/ClusterFit.cpp +++ b/src/nvtt/ClusterFit.cpp @@ -38,79 +38,6 @@ ClusterFit::ClusterFit() { } -#if 0 // @@ Deprecate. Do not use color set directly. -void ClusterFit::setColorSet(const ColorSet * set) -{ - // initialise the best error -#if NVTT_USE_SIMD - m_besterror = SimdVector( FLT_MAX ); - Vector3 metric = m_metric.toVector3(); -#else - m_besterror = FLT_MAX; - Vector3 metric = m_metric; -#endif - - // cache some values - m_count = set->colorCount; - - Vector3 values[16]; - for (uint i = 0; i < m_count; i++) - { - values[i] = set->colors[i].xyz(); - } - - Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric); - //Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric); - - // build the list of values - int order[16]; - float dps[16]; - for (uint i = 0; i < m_count; ++i) - { - dps[i] = dot(values[i], principal); - order[i] = i; - } - - // stable sort - for (uint i = 0; i < m_count; ++i) - { - for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j) - { - swap(dps[j], dps[j - 1]); - swap(order[j], order[j - 1]); - } - } - - // weight all the points -#if NVTT_USE_SIMD - m_xxsum = SimdVector( 0.0f ); - m_xsum = SimdVector( 0.0f ); -#else - m_xxsum = Vector3(0.0f); - m_xsum = Vector3(0.0f); - m_wsum = 0.0f; -#endif - - for (uint i = 0; i < m_count; ++i) - { - int p = order[i]; -#if NVTT_USE_SIMD - NV_ALIGN_16 Vector4 tmp(values[p], 1); - m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]); - m_xxsum += m_weighted[i] * m_weighted[i]; - m_xsum += m_weighted[i]; -#else - m_weighted[i] = values[p] * set->weights[p]; - m_xxsum += m_weighted[i] * m_weighted[i]; - m_xsum += m_weighted[i]; - m_weights[i] = set->weights[p]; - m_wsum += m_weights[i]; -#endif - } -} -#endif // 0 - - void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count) { // initialise the best error @@ -412,13 +339,13 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end ) #else inline Vector3 round565(const Vector3 & v) { - uint r = ftoi_trunc(v.x * 31.0f); + uint r = ftoi_trunc(v.x * 31.0f); float r0 = float(((r+0) << 3) | ((r+0) >> 2)); float r1 = float(((r+1) << 3) | ((r+1) >> 2)); if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U); - r = (r << 3) | (r >> 2); + r = (r << 3) | (r >> 2); - uint g = ftoi_trunc(v.y * 63.0f); + uint g = ftoi_trunc(v.y * 63.0f); float g0 = float(((g+0) << 2) | ((g+0) >> 4)); float g1 = float(((g+1) << 2) | ((g+1) >> 4)); if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U); @@ -428,8 +355,8 @@ inline Vector3 round565(const Vector3 & v) { float b0 = float(((b+0) << 3) | ((b+0) >> 2)); float b1 = float(((b+1) << 3) | ((b+1) >> 2)); if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U); - - b = (b << 3) | (b >> 2); + + b = (b << 3) | (b >> 2); return Vector3(float(r)/255, float(g)/255, float(b)/255); } diff --git a/src/nvtt/CompressionOptions.cpp b/src/nvtt/CompressionOptions.cpp index a899a67..502d1e3 100644 --- a/src/nvtt/CompressionOptions.cpp +++ b/src/nvtt/CompressionOptions.cpp @@ -50,7 +50,8 @@ void CompressionOptions::reset() m.format = Format_DXT1; m.quality = Quality_Normal; m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f); - + m.rgbmThreshold = 0.15f; + m.bitcount = 32; m.bmask = 0x000000FF; m.gmask = 0x0000FF00; @@ -102,6 +103,11 @@ void CompressionOptions::setColorWeights(float red, float green, float blue, flo m.colorWeight.set(red, green, blue, alpha); } +void CompressionOptions::setRGBMThreshold(float min_m) +{ + m.rgbmThreshold = min_m; +} + /// Set color mask to describe the RGB/RGBA format. void CompressionOptions::setPixelFormat(uint bitCount, uint rmask, uint gmask, uint bmask, uint amask) @@ -162,7 +168,7 @@ void CompressionOptions::setPixelType(PixelType pixelType) /// Set pitch alignment in bytes. void CompressionOptions::setPitchAlignment(int pitchAlignment) { - nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(pitchAlignment)); + nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(U32(pitchAlignment))); m.pitchAlignment = pitchAlignment; } @@ -194,6 +200,10 @@ void CompressionOptions::setTargetDecoder(Decoder decoder) } +Format CompressionOptions::format() const +{ + return m.format; +} // Translate to and from D3D formats. unsigned int CompressionOptions::d3d9Format() const @@ -246,10 +256,20 @@ unsigned int CompressionOptions::d3d9Format() const FOURCC_ATI2, // Format_BC5 FOURCC_DXT1, // Format_DXT1n 0, // Format_CTX1 - MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6 - MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7 - //FOURCC_ATI2, // Format_BC5_Luma - FOURCC_DXT5, // Format_BC3_RGBM + FOURCC_BC6H, // Format_BC6 + FOURCC_BC7L, // Format_BC7 + FOURCC_DXT5, // Format_BC3_RGBM + NV_MAKEFOURCC('E', 'T', 'C', '1'), // Format_ETC1 + 0, // Format_ETC2_R + 0, // Format_ETC2_RG + NV_MAKEFOURCC('E', 'T', 'C', '2'), // Format_ETC2_RGB + 0, // Format_ETC2_RGBA + 0, // Format_ETC2_RGB_A1 + 0, // Format_ETC2_RGBM + FOURCC_PVR0, + FOURCC_PVR1, + FOURCC_PVR2, + FOURCC_PVR3, }; NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count); @@ -258,12 +278,80 @@ unsigned int CompressionOptions::d3d9Format() const } } -/* -bool CompressionOptions::setDirect3D9Format(unsigned int format) +unsigned int CompressionOptions::dxgiFormat() const // @@ Add srgb flag. { + if (m.format == Format_RGB) { + if (m.pixelType == PixelType_UnsignedNorm) { + + uint bitcount = m.bitcount; + uint rmask = m.rmask; + uint gmask = m.gmask; + uint bmask = m.bmask; + uint amask = m.amask; + + if (bitcount == 0) { + bitcount = m.rsize + m.gsize + m.bsize + m.asize; + rmask = ((1 << m.rsize) - 1) << (m.asize + m.bsize + m.gsize); + gmask = ((1 << m.gsize) - 1) << (m.asize + m.bsize); + bmask = ((1 << m.bsize) - 1) << m.asize; + amask = ((1 << m.asize) - 1) << 0; + } + + if (bitcount <= 32) { + return nv::findDXGIFormat(bitcount, rmask, gmask, bmask, amask); + } + else { + if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_UNORM; + if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_UNORM; + } + } + else if (m.pixelType == PixelType_Float) { + if (m.rsize == 16 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16_FLOAT; + if (m.rsize == 32 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32_FLOAT; + if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_FLOAT; + if (m.rsize == 32 && m.gsize == 32 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32G32_FLOAT; + if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_FLOAT; + if (m.rsize == 32 && m.gsize == 32 && m.bsize == 32 && m.asize == 32) return DXGI_FORMAT_R32G32B32A32_FLOAT; + } + + return 0; + } + else { + uint dxgi_formats[] = { + 0, // Format_RGB, + DXGI_FORMAT_BC1_UNORM, // Format_DXT1 + DXGI_FORMAT_BC1_UNORM, // Format_DXT1a + DXGI_FORMAT_BC2_UNORM, // Format_DXT3 + DXGI_FORMAT_BC3_UNORM, // Format_DXT5 + DXGI_FORMAT_BC3_UNORM, // Format_DXT5n + DXGI_FORMAT_BC4_UNORM, // Format_BC4 + DXGI_FORMAT_BC5_UNORM, // Format_BC5 + DXGI_FORMAT_BC1_UNORM, // Format_DXT1n + 0, // Format_CTX1 + DXGI_FORMAT_BC6H_UF16, // Format_BC6 + DXGI_FORMAT_BC7_UNORM, // Format_BC7 + DXGI_FORMAT_BC5_UNORM, // Format_BC3_RGBM + 0, // Format_ETC1 + 0, // Format_ETC2_R + 0, // Format_ETC2_RG + 0, // Format_ETC2_RGB + 0, // Format_ETC2_RGBA + 0, // Format_ETC2_RGB_A1 + 0, // Format_ETC2_RGBM + 0, // Format_PVR_2BPP_RGB + 0, // Format_PVR_4BPP_RGB + 0, // Format_PVR_2BPP_RGBA + 0, // Format_PVR_4BPP_RGB + }; + + NV_COMPILER_CHECK(NV_ARRAY_SIZE(dxgi_formats) == Format_Count); + + return dxgi_formats[m.format]; + } } -unsigned int CompressionOptions::dxgiFormat() const +/* +bool CompressionOptions::setDirect3D9Format(unsigned int format) { } diff --git a/src/nvtt/CompressionOptions.h b/src/nvtt/CompressionOptions.h index 7612f8f..98d801a 100644 --- a/src/nvtt/CompressionOptions.h +++ b/src/nvtt/CompressionOptions.h @@ -39,7 +39,8 @@ namespace nvtt Quality quality; nv::Vector4 colorWeight; - + float rgbmThreshold; + // Pixel format description. uint bitcount; uint rmask; diff --git a/src/nvtt/Compressor.h b/src/nvtt/Compressor.h index f55d94c..54b9cc9 100644 --- a/src/nvtt/Compressor.h +++ b/src/nvtt/Compressor.h @@ -30,6 +30,7 @@ namespace nv { + struct CompressorInterface { virtual ~CompressorInterface() {} diff --git a/src/nvtt/CompressorDX11.cpp b/src/nvtt/CompressorDX11.cpp index a349ffa..d64e662 100644 --- a/src/nvtt/CompressorDX11.cpp +++ b/src/nvtt/CompressorDX11.cpp @@ -39,7 +39,7 @@ using namespace nv; using namespace nvtt; -void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +void CompressorBC6::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) { // !!!UNDONE: support channel weights // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) @@ -77,7 +77,7 @@ void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[ ZOH::compress(zohTile, (char *)output); } -void CompressorBC7::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) +void CompressorBC7::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output) { // !!!UNDONE: support channel weights // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) diff --git a/src/nvtt/CompressorDX11.h b/src/nvtt/CompressorDX11.h index 7afaacb..39247f3 100644 --- a/src/nvtt/CompressorDX11.h +++ b/src/nvtt/CompressorDX11.h @@ -30,14 +30,14 @@ namespace nv { struct CompressorBC6 : public FloatColorCompressor { - virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; } }; struct CompressorBC7 : public FloatColorCompressor { - virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } + virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); + virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; } }; } // nv namespace diff --git a/src/nvtt/CompressorDX9.cpp b/src/nvtt/CompressorDX9.cpp index 9cfd7da..c05c762 100644 --- a/src/nvtt/CompressorDX9.cpp +++ b/src/nvtt/CompressorDX9.cpp @@ -28,7 +28,7 @@ #include "CompressionOptions.h" #include "OutputOptions.h" #include "ClusterFit.h" -#include "CompressorDXT1.h" +//#include "CompressorDXT1.h" #include "CompressorDXT5_RGBM.h" // squish @@ -48,45 +48,11 @@ #include // placement new -// s3_quant -#if defined(HAVE_S3QUANT) -#include "s3tc/s3_quant.h" -#endif - -// ati tc -#if defined(HAVE_ATITC) -typedef int BOOL; -typedef _W64 unsigned long ULONG_PTR; -typedef ULONG_PTR DWORD_PTR; -#include "atitc/ATI_Compress.h" -#endif - -// squish -#if defined(HAVE_SQUISH) -//#include "squish/squish.h" -#include "squish-1.10/squish.h" -#endif - -// d3dx -#if defined(HAVE_D3DX) -#include -#endif - -// stb -#if defined(HAVE_STB) -#define STB_DEFINE -#include "stb/stb_dxt.h" -#endif using namespace nv; using namespace nvtt; -void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - BlockDXT1 * block = new(output) BlockDXT1; - QuickCompress::compressDXT1(rgba, block); -} void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) { @@ -115,39 +81,13 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha } -#if 1 - -void CompressorDXT1::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output); -} - -#else -void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - if (rgba.isSingleColor()) - { - BlockDXT1 * block = new(output) BlockDXT1; - OptimalCompress::compressDXT1(rgba.color(0), block); - } - else - { - nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0); - fit.SetColourSet(&colours, nvsquish::kDxt1); - fit.Compress(output); - } -} -#endif void CompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) { uint alphaMask = 0; for (uint i = 0; i < 16; i++) { - if (rgba.color(i).a == 0) alphaMask |= (3 << (i * 2)); // Set two bits for each color. + if (rgba.color(i).a == 0) alphaMask |= (3U << (i * 2U)); // Set two bits for each color. } const bool isSingleColor = rgba.isSingleColor(); @@ -284,216 +224,6 @@ void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode } -void CompressorBC3_RGBM::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - float min_m = 0.25f; // @@ Get from compression options. - compress_dxt5_rgbm(colors, weights, min_m, (BlockDXT5 *)output); -} - - -#if defined(HAVE_ATITC) - -void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck(d == 1); - - // Init source texture - ATI_TC_Texture srcTexture; - srcTexture.dwSize = sizeof(srcTexture); - srcTexture.dwWidth = w; - srcTexture.dwHeight = h; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - srcTexture.dwPitch = w * 4; - srcTexture.format = ATI_TC_FORMAT_ARGB_8888; - } - else - { - // @@ Floating point input is not swizzled. - srcTexture.dwPitch = w * 16; - srcTexture.format = ATI_TC_FORMAT_ARGB_32F; - } - srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); - srcTexture.pData = (ATI_TC_BYTE*) data; - - // Init dest texture - ATI_TC_Texture destTexture; - destTexture.dwSize = sizeof(destTexture); - destTexture.dwWidth = w; - destTexture.dwHeight = h; - destTexture.dwPitch = 0; - destTexture.format = ATI_TC_FORMAT_DXT1; - destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); - destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); - - ATI_TC_CompressOptions options; - options.dwSize = sizeof(options); - options.bUseChannelWeighting = false; - options.bUseAdaptiveWeighting = false; - options.bDXT1UseAlpha = false; - options.nCompressionSpeed = ATI_TC_Speed_Normal; - options.bDisableMultiThreading = false; - //options.bDisableMultiThreading = true; - - // Compress - ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); - } - mem::free(destTexture.pData); -} - -void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck(d == 1); - - // Init source texture - ATI_TC_Texture srcTexture; - srcTexture.dwSize = sizeof(srcTexture); - srcTexture.dwWidth = w; - srcTexture.dwHeight = h; - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - srcTexture.dwPitch = w * 4; - srcTexture.format = ATI_TC_FORMAT_ARGB_8888; - } - else - { - srcTexture.dwPitch = w * 16; - srcTexture.format = ATI_TC_FORMAT_ARGB_32F; - } - srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture); - srcTexture.pData = (ATI_TC_BYTE*) data; - - // Init dest texture - ATI_TC_Texture destTexture; - destTexture.dwSize = sizeof(destTexture); - destTexture.dwWidth = w; - destTexture.dwHeight = h; - destTexture.dwPitch = 0; - destTexture.format = ATI_TC_FORMAT_DXT5; - destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture); - destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize); - - // Compress - ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize); - } - - mem::free(destTexture.pData); -} - -#endif // defined(HAVE_ATITC) - -#if defined(HAVE_SQUISH) - -void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck(d == 1); - nvDebugCheck(false); - -#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB") - /* - Image img(*image); - int count = img.width() * img.height(); - for (int i = 0; i < count; i++) - { - Color32 c = img.pixel(i); - img.pixel(i) = Color32(c.b, c.g, c.r, c.a); - } - - int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1); - void * blocks = mem::malloc(size); - - squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit); - - if (outputOptions.outputHandler != NULL) { - outputOptions.outputHandler->writeData(blocks, size); - } - - mem::free(blocks); - */ -} - -#endif // defined(HAVE_SQUISH) - - -#if defined(HAVE_D3DX) - -void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) -{ - nvDebugCheck(d == 1); - - IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION); - - D3DPRESENT_PARAMETERS presentParams; - ZeroMemory(&presentParams, sizeof(presentParams)); - presentParams.Windowed = TRUE; - presentParams.SwapEffect = D3DSWAPEFFECT_COPY; - presentParams.BackBufferWidth = 8; - presentParams.BackBufferHeight = 8; - presentParams.BackBufferFormat = D3DFMT_UNKNOWN; - - HRESULT err; - - IDirect3DDevice9 * device = NULL; - err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device); - - IDirect3DTexture9 * texture = NULL; - err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture); - - IDirect3DSurface9 * surface = NULL; - err = texture->GetSurfaceLevel(0, &surface); - - RECT rect; - rect.left = 0; - rect.top = 0; - rect.bottom = h; - rect.right = w; - - if (inputFormat == nvtt::InputFormat_BGRA_8UB) - { - err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0); - } - else - { - err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0); - } - - if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA) - { - D3DLOCKED_RECT rect; - ZeroMemory(&rect, sizeof(rect)); - - err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY); - - if (outputOptions.outputHandler != NULL) { - int size = rect.Pitch * ((h + 3) / 4); - outputOptions.outputHandler->writeData(rect.pBits, size); - } - - err = surface->UnlockRect(); - } - - surface->Release(); - device->Release(); - d3d->Release(); -} - -#endif // defined(HAVE_D3DX) - - -#if defined(HAVE_STB) - -void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) -{ - rgba.swizzle(2, 1, 0, 3); // Swap R and B - stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0); -} -#endif // defined(HAVE_STB) diff --git a/src/nvtt/CompressorDX9.h b/src/nvtt/CompressorDX9.h index 8a298c6..7cb13a3 100644 --- a/src/nvtt/CompressorDX9.h +++ b/src/nvtt/CompressorDX9.h @@ -32,12 +32,6 @@ namespace nv struct ColorBlock; // Fast CPU compressors. - struct FastCompressorDXT1 : public ColorBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; - struct FastCompressorDXT1a : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); @@ -64,19 +58,6 @@ namespace nv // Normal CPU compressors. -#if 1 - struct CompressorDXT1 : public FloatColorCompressor - { - virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; -#else - struct CompressorDXT1 : public ColorBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; -#endif struct CompressorDXT1a : public ColorBlockCompressor { @@ -108,47 +89,9 @@ namespace nv virtual uint blockSize() const { return 16; } }; - struct CompressorBC3_RGBM : public FloatColorCompressor - { - virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 16; } - }; - - - // External compressors. -#if defined(HAVE_ATITC) - struct AtiCompressorDXT1 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; - struct AtiCompressorDXT5 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; -#endif -#if defined(HAVE_SQUISH) - struct SquishCompressorDXT1 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; -#endif -#if defined(HAVE_D3DX) - struct D3DXCompressorDXT1 : public CompressorInterface - { - virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); - }; -#endif - -#if defined(HAVE_STB) - struct StbCompressorDXT1 : public ColorBlockCompressor - { - virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); - virtual uint blockSize() const { return 8; } - }; -#endif } // nv namespace diff --git a/src/nvtt/CompressorDXT1.cpp b/src/nvtt/CompressorDXT1.cpp index 08134f8..8a09669 100644 --- a/src/nvtt/CompressorDXT1.cpp +++ b/src/nvtt/CompressorDXT1.cpp @@ -218,13 +218,13 @@ static int evaluate_mse(const Color32 & p, const Color32 & c) { return (square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b)); } -static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) { +/*static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) { float e0 = evaluate_mse(palette[0], c, w); float e1 = evaluate_mse(palette[1], c, w); float e2 = evaluate_mse(palette[2], c, w); float e3 = evaluate_mse(palette[3], c, w); return min(min(e0, e1), min(e2, e3)); -} +}*/ static int evaluate_mse(const Color32 palette[4], const Color32 & c) { int e0 = evaluate_mse(palette[0], c); @@ -245,12 +245,12 @@ static int evaluate_mse(const BlockDXT1 * output, Color32 color, int index) { // Returns weighted MSE error in [0-255] range. static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) { - float total = 0.0f; - for (int i = 0; i < count; i++) { + float total = 0.0f; + for (int i = 0; i < count; i++) { total += weights[i] * evaluate_mse(palette, colors[i]); - } + } - return total; + return total; } #if 0 @@ -337,7 +337,7 @@ static void evaluate_palette(Color16 c0, Color16 c1, Vector3 palette[4]) { } } -static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) { +/*static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) { nvDebugCheck(c0.u > c1.u); Color32 palette32[4]; @@ -346,7 +346,7 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) { for (int i = 0; i < 4; i++) { palette[i] = color_to_vector3(palette32[i]); } -} +}*/ @@ -355,38 +355,38 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) { static uint compute_indices4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { uint indices = 0; - for (int i = 0; i < 16; i++) { - float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); - float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); - float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); - float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); - - uint b0 = d0 > d3; - uint b1 = d1 > d2; - uint b2 = d0 > d2; - uint b3 = d1 > d3; - uint b4 = d2 > d3; - - uint x0 = b1 & b2; - uint x1 = b0 & b3; - uint x2 = b0 & b4; - - indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); - } + for (int i = 0; i < 16; i++) { + float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); + float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); + float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); + float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); + + uint b0 = d0 > d3; + uint b1 = d1 > d2; + uint b2 = d0 > d2; + uint b3 = d1 > d3; + uint b4 = d2 > d3; + + uint x0 = b1 & b2; + uint x1 = b0 & b3; + uint x2 = b0 & b4; + + indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); + } - return indices; + return indices; } static uint compute_indices(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { uint indices = 0; - for (int i = 0; i < 16; i++) { - float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); - float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); - float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); - float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); - + for (int i = 0; i < 16; i++) { + float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); + float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); + float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); + float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); + uint index; if (d0 < d1 && d0 < d2 && d0 < d3) index = 0; else if (d1 < d2 && d1 < d3) index = 1; @@ -491,7 +491,8 @@ float nv::compress_dxt1_single_color(const Vector3 * colors, const float * weigh // Decompress block color. Color32 palette[4]; - output->evaluatePalette(palette, /*d3d9=*/false); + evaluate_palette(output->col0, output->col1, palette); + //output->evaluatePalette(palette, /*d3d9=*/false); Vector3 block_color = color_to_vector3(palette[output->indices & 0x3]); @@ -668,7 +669,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight // This is too expensive, even with a low threshold. // If high quality: - if (0) { + if (/* DISABLES CODE */ (0)) { BlockDXT1 exhaustive_output; float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output); @@ -720,7 +721,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight // Least squares fitting of color end points for the given indices. @@ Take weights into account. -static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b) +static bool optimize_end_points4(uint indices, const Vector4 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b) { float alpha2_sum = 0.0f; float beta2_sum = 0.0f; @@ -739,8 +740,8 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec alpha2_sum += alpha * alpha; beta2_sum += beta * beta; alphabeta_sum += alpha * beta; - alphax_sum += alpha * colors[i]; - betax_sum += beta * colors[i]; + alphax_sum += alpha * colors[i].xyz(); + betax_sum += beta * colors[i].xyz(); } float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; @@ -756,7 +757,7 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec // Least squares fitting of color end points for the given indices. @@ This does not support black/transparent index. @@ Take weights into account. -static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b) +static bool optimize_end_points3(uint indices, const Vector3 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b) { float alpha2_sum = 0.0f; float beta2_sum = 0.0f; @@ -794,6 +795,90 @@ static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vec +// find minimum and maximum colors based on bounding box in color space +inline static void fit_colors_bbox(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1) +{ + *c0 = Vector3(0); + *c1 = Vector3(255); + + for (int i = 0; i < count; i++) { + *c0 = max(*c0, colors[i]); + *c1 = min(*c1, colors[i]); + } +} + +inline static void select_diagonal(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1) +{ + Vector3 center = (*c0 + *c1) * 0.5f; + + Vector2 covariance = Vector2(0); + for (int i = 0; i < count; i++) { + Vector3 t = colors[i] - center; + covariance += t.xy() * t.z; + } + + float x0 = c0->x; + float y0 = c0->y; + float x1 = c1->x; + float y1 = c1->y; + + if (covariance.x < 0) { + swap(x0, x1); + } + if (covariance.y < 0) { + swap(y0, y1); + } + + c0->set(x0, y0, c0->z); + c1->set(x1, y1, c1->z); +} + +inline static void inset_bbox(Vector3 * restrict c0, Vector3 * restrict c1) +{ + Vector3 inset = (*c0 - *c1) / 16.0f - (8.0f / 255.0f) / 16.0f; + *c0 = clamp(*c0 - inset, 0.0f, 255.0f); + *c1 = clamp(*c1 + inset, 0.0f, 255.0f); +} + + + +float nv::compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output) +{ + Vector3 colors[16]; + float weights[16]; + int count = reduce_colors(input_colors, input_weights, colors, weights); + + if (count == 0) { + // Output trivial block. + output->col0.u = 0; + output->col1.u = 0; + output->indices = 0; + return 0; + } + + + float error = FLT_MAX; + error = compress_dxt1_single_color(colors, weights, count, color_weights, output); + + if (error == 0.0f || count == 1) { + // Early out. + return error; + } + + // Quick end point selection. + Vector3 c0, c1; + fit_colors_bbox(colors, count, &c0, &c1); + select_diagonal(colors, count, &c0, &c1); + inset_bbox(&c0, &c1); + output_block4(input_colors, color_weights, c0, c1, output); + + // Refine color for the selected indices. + if (optimize_end_points4(output->indices, input_colors, 16, &c0, &c1)) { + output_block4(input_colors, color_weights, c0, c1, output); + } + + return evaluate_mse(input_colors, input_weights, color_weights, output); +} diff --git a/src/nvtt/CompressorDXT1.h b/src/nvtt/CompressorDXT1.h index c7e51d7..ac5bdb5 100644 --- a/src/nvtt/CompressorDXT1.h +++ b/src/nvtt/CompressorDXT1.h @@ -13,11 +13,14 @@ namespace nv { float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output); float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); - float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); + //float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output); void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); - + // Cluster fit end point selection. float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); + // Quick end point selection followed by least squares refinement. + float compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output); + } diff --git a/src/nvtt/CompressorDXT5_RGBM.cpp b/src/nvtt/CompressorDXT5_RGBM.cpp index 99dd412..3274f2c 100644 --- a/src/nvtt/CompressorDXT5_RGBM.cpp +++ b/src/nvtt/CompressorDXT5_RGBM.cpp @@ -3,6 +3,7 @@ #include "OptimalCompressDXT.h" #include "QuickCompressDXT.h" +#include "CompressorETC.h" #include "nvimage/ColorBlock.h" #include "nvimage/BlockDXT.h" @@ -17,38 +18,45 @@ using namespace nv; -//static uint atomic_counter = 0; - - -float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) { - // Convert to RGBM. - Vector4 input_colors_rgbm[16]; // @@ Write over input_colors? - float rgb_weights[16]; +static void convert_to_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, Vector4 rgbm_colors[16], float rgb_weights[16]) { + float weight_sum = 0; - + for (uint i = 0; i < 16; i++) { const Vector4 & c = input_colors[i]; - + float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); - + float M = max(max(R, G), max(B, min_m)); float r = R / M; float g = G / M; float b = B / M; float a = (M - min_m) / (1 - min_m); - - input_colors_rgbm[i] = Vector4(r, g, b, a); + + rgbm_colors[i] = Vector4(r, g, b, a); rgb_weights[i] = input_weights[i] * M; weight_sum += input_weights[i]; } - + if (weight_sum == 0) { for (uint i = 0; i < 16; i++) rgb_weights[i] = 1; } + +} + + +//static uint atomic_counter = 0; + +float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) { + + // Convert to RGBM. + Vector4 input_colors_rgbm[16]; // @@ Write over input_colors? + float rgb_weights[16]; + convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights); // Compress RGB. compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color); @@ -138,291 +146,61 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w } - - -#if 0 - - BlockDXT5 * block = new(output)BlockDXT5; - - // Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors. - - // Compress the resulting M values optimally. - - // Repeat this several times until compression error does not improve? - - //Vector3 rgb_block[16]; - //float m_block[16]; - - - // Init RGB/M block. -#if 0 - nvsquish::WeightedClusterFit fit; - - ColorBlock rgba; - for (int i = 0; i < 16; i++) { - const Vector4 & c = src.color(i); - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - float M = max(max(R, G), max(B, min_m)); - float r = R / M; - float g = G / M; - float b = B / M; - float a = c.w; - - rgba.color(i) = toColor32(Vector4(r, g, b, a)); - } - - if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - } -#endif -#if 1 - ColorSet rgb; - rgb.allocate(4, 4); - - for (uint i = 0; i < 16; i++) { - const Vector4 & c = colors[i]; - - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - float M = max(max(R, G), max(B, min_m)); - float r = R / M; - float g = G / M; - float b = B / M; - float a = c.w; - - rgb.colors[i] = Vector4(r, g, b, a); - rgb.indices[i] = i; - rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. - } - - rgb.createMinimalSet(/*ignoreTransparent=*/true); - - if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { - OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); - } - else { - ClusterFit fit; - fit.setColorWeights(compressionOptions.colorWeight); - fit.setColorSet(&rgb); - - Vector3 start, end; - fit.compress4(&start, &end); - - QuickCompress::outputBlock4(rgb, start, end, &block->color); - } -#endif - +float nv::compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output) { + + // Convert to RGBM. + Vector4 rgbm_colors[16]; + float rgb_weights[16]; + convert_to_rgbm(input_colors, input_weights, min_m, rgbm_colors, rgb_weights); + + void * etc_output = (uint8 *)output + 8; + void * eac_output = output; + + // Compress RGB. + compress_etc2(rgbm_colors, rgb_weights, Vector3(1), etc_output); + // Decompress RGB/M block. - nv::ColorBlock RGB; - block->color.decodeBlock(&RGB); + decompress_etc(etc_output, rgbm_colors); -#if 1 - AlphaBlock4x4 M; + // Compute M values to compensate for RGB's error. for (int i = 0; i < 16; i++) { - const Vector4 & c = colors[i]; + const Vector4 & c = input_colors[i]; + float R = saturate(c.x); float G = saturate(c.y); float B = saturate(c.z); - - float r = RGB.color(i).r / 255.0f; - float g = RGB.color(i).g / 255.0f; - float b = RGB.color(i).b / 255.0f; - - float m = (R / r + G / g + B / b) / 3.0f; - //float m = max((R / r + G / g + B / b) / 3.0f, min_m); - //float m = max(max(R / r, G / g), max(B / b, min_m)); - //float m = max(max(R, G), max(B, min_m)); + + float rm = rgbm_colors[i].x; + float gm = rgbm_colors[i].y; + float bm = rgbm_colors[i].z; + + // compute m such that m * (r/M, g/M, b/M) == RGB + + // Three equations, one unknown: + // m * r/M == R + // m * g/M == G + // m * b/M == B + + // Solve in the least squares sense! + + // m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T + + // m == dot(rgb, RGB) / dot(rgb, rgb) + + float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm)); + if (!isFinite(m)) { + m = 1; + } m = (m - min_m) / (1 - min_m); - - M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); - M.weights[i] = weights[i]; - } - - // Compress M. - if (compressionOptions.quality == Quality_Fastest) { - QuickCompress::compressDXT5A(M, &block->alpha); - } - else { - OptimalCompress::compressDXT5A(M, &block->alpha); - } -#else - OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha); -#endif - -#if 0 - // Decompress M. - block->alpha.decodeBlock(&M); - - rgb.allocate(src.w, src.h); // @@ Handle smaller blocks. - - for (uint i = 0; i < src.colorCount; i++) { - const Vector4 & c = src.color(i); - - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - //float m = max(max(R, G), max(B, min_m)); - float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m; - float r = R / m; - float g = G / m; - float b = B / m; - float a = c.w; - - rgb.colors[i] = Vector4(r, g, b, a); - rgb.indices[i] = i; - rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set. - } - - rgb.createMinimalSet(/*ignoreTransparent=*/true); - - if (rgb.isSingleColor(/*ignoreAlpha=*/true)) { - OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color); - } - else { - ClusterFit fit; - fit.setMetric(compressionOptions.colorWeight); - fit.setColourSet(&rgb); - - Vector3 start, end; - fit.compress4(&start, &end); - - QuickCompress::outputBlock4(rgb, start, end, &block->color); + + // Store M in alpha channel. + rgbm_colors[i].w = saturate(m); // @@ What it we don't saturate? } -#endif - -#if 0 - block->color.decodeBlock(&RGB); - - //AlphaBlock4x4 M; - //M.initWeights(src); - for (int i = 0; i < 16; i++) { - const Vector4 & c = src.color(i); - float R = saturate(c.x); - float G = saturate(c.y); - float B = saturate(c.z); - - float r = RGB.color(i).r / 255.0f; - float g = RGB.color(i).g / 255.0f; - float b = RGB.color(i).b / 255.0f; - - float m = (R / r + G / g + B / b) / 3.0f; - //float m = max((R / r + G / g + B / b) / 3.0f, min_m); - //float m = max(max(R / r, G / g), max(B / b, min_m)); - //float m = max(max(R, G), max(B, min_m)); - - m = (m - min_m) / (1 - min_m); - - M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); - M.weights[i] = src.weights[i]; - } - // Compress M. - if (compressionOptions.quality == Quality_Fastest) { - QuickCompress::compressDXT5A(M, &block->alpha); - } - else { - OptimalCompress::compressDXT5A(M, &block->alpha); - } -#endif - - - -#if 0 - src.fromRGBM(M, min_m); + compress_eac(rgbm_colors, input_weights, /*input_channel=*/3, /*search_radius=*/1, /*11bit_mode*/false, eac_output); - src.createMinimalSet(/*ignoreTransparent=*/true); - - if (src.isSingleColor(/*ignoreAlpha=*/true)) { - OptimalCompress::compressDXT1(src.color(0), &block->color); - } - else { - // @@ Use our improved compressor. - ClusterFit fit; - fit.setMetric(compressionOptions.colorWeight); - fit.setColourSet(&src); - - Vector3 start, end; - fit.compress4(&start, &end); - - if (fit.compress3(&start, &end)) { - QuickCompress::outputBlock3(src, start, end, block->color); - } - else { - QuickCompress::outputBlock4(src, start, end, block->color); - } - } -#endif // 0 - - // @@ Decompress color and compute M that best approximates src with these colors? Then compress M again? - - - - // RGBM encoding. - // Maximize precision. - // - Number of possible grey levels: - // - Naive: 2^3 = 8 - // - Better: 2^3 + 2^2 = 12 - // - How to choose min_m? - // - Ideal = Adaptive per block, don't know where to store. - // - Adaptive per lightmap. How to compute optimal? - // - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range. - - // - Optimal compressor: Interpolation artifacts. - - // - Color transform. - // - Measure error in post-tone-mapping color space. - // - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game. - // - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space. - - // - Enhanced DXT compressor. - // - Typical RGBM encoding as follows: - // rgb -> M = max(rgb), RGB=rgb/M -> RGBM - // - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1. - // - We could ensure that M' >= M during compression. - // - We could clamp RGB anyway. - // - We could add a fixed scale value to take into account compression errors and avoid clamping. - - - - - - // Compress color. - /*if (rgba.isSingleColor()) - { - OptimalCompress::compressDXT1(rgba.color(0), &block->color); - } - else - { - nvsquish::WeightedClusterFit fit; - fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z); - - int flags = 0; - if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha; - - nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags); - fit.SetColourSet(&colours, 0); - fit.Compress(&block->color); - }*/ + return 0; // @@ Compute error. +} -#endif // 0 \ No newline at end of file diff --git a/src/nvtt/CompressorDXT5_RGBM.h b/src/nvtt/CompressorDXT5_RGBM.h index 88cf646..3fdffe5 100644 --- a/src/nvtt/CompressorDXT5_RGBM.h +++ b/src/nvtt/CompressorDXT5_RGBM.h @@ -5,5 +5,5 @@ namespace nv { class Vector4; float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output); - + float compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output); } diff --git a/src/nvtt/CompressorETC.cpp b/src/nvtt/CompressorETC.cpp new file mode 100644 index 0000000..4fb8e1c --- /dev/null +++ b/src/nvtt/CompressorETC.cpp @@ -0,0 +1,2307 @@ + +#include "CompressorETC.h" + +#include "nvmath/Vector.inl" +#include "nvmath/Matrix.inl" +#include "nvmath/Color.inl" +#include "nvcore/Utils.h" // clamp + +#define HAVE_RGETC NV_OS_OSX +#define HAVE_ETCPACK 0 // Only enable in OSX for debugging. + +#if HAVE_RGETC +#include "rg_etc1.h" +#endif + +#if HAVE_ETCPACK +// From etcpack.cxx +extern void decompressBlockETC2(unsigned int block_part1, unsigned int block_part2, uint8 *img, int width, int height, int startx, int starty); +extern void decompressBlockAlpha(uint8* data, uint8* img, int width, int height, int ix, int iy); +extern void decompressBlockAlpha16bit(uint8* data, uint8* img, int width, int height, int ix, int iy); +extern int formatSigned; +#endif + +#define assert nvCheck + +using namespace nv; + +// TODO: +// - Accurate rounding of signed 3-bit components. +// - Range based table selection. +// - Slower try all options table selection? +// - Trivial selector assignment. +// * Base point optimization. +// * Brute force base point optimization. +// - Enumerate and evaluate all clusters. +// - Brute force planar mode endpoint refinement. For each color try two rounding directions (8 tests). +// - T & H modes decompression. + +union BlockETC { + // Definitions from EtcLib/EtcBlock4x4EncodingBits.h + struct Individual { + uint red2 : 4; // byte 0 + uint red1 : 4; + uint green2 : 4; // byte 1 + uint green1 : 4; + uint blue2 : 4; // byte 2 + uint blue1 : 4; + uint flip : 1; // byte 3 + uint diff : 1; + uint cw2 : 3; + uint cw1 : 3; + uint selectors; // bytes 4-7 + }; + NV_COMPILER_CHECK(sizeof(BlockETC::Individual) == 64/8); + + struct Differential { + uint dred2 : 3; // byte 0 + uint red1 : 5; + uint dgreen2 : 3; // byte 1 + uint green1 : 5; + uint dblue2 : 3; // byte 2 + uint blue1 : 5; + uint flip : 1; // byte 3 + uint diff : 1; + uint cw2 : 3; + uint cw1 : 3; + uint selectors; // bytes 4-7 + }; + NV_COMPILER_CHECK(sizeof(Differential) == 64/8); + + struct T { + uint red1b : 2; // byte 0 + uint detect2 : 1; + uint red1a : 2; + uint detect1 : 3; + uint blue1 : 4; // byte 1 + uint green1 : 4; + uint green2 : 4; // byte 2 + uint red2 : 4; + uint db : 1; // byte 3 + uint diff : 1; + uint da : 2; + uint blue2 : 4; + uint selectors; // bytes 4-7 + }; + NV_COMPILER_CHECK(sizeof(T) == 64/8); + + struct H { + uint green1a : 3; // byte 0 + uint red1 : 4; + uint detect1 : 1; + uint blue1b : 2; // byte 1 + uint detect3 : 1; + uint blue1a : 1; + uint green1b : 1; + uint detect2 : 3; + uint green2a : 3; // byte 2 + uint red2 : 4; + uint blue1c : 1; + uint db : 1; // byte 3 + uint diff : 1; + uint da : 1; + uint blue2 : 4; + uint green2b : 1; + uint selectors; // bytes 4-7 + }; + NV_COMPILER_CHECK(sizeof(H) == 64/8); + + struct Planar { + uint originGreen1 : 1; // byte 0 + uint originRed : 6; + uint detect1 : 1; + uint originBlue1 : 1; // byte 1 + uint originGreen2 : 6; + uint detect2 : 1; + uint originBlue3 : 2; // byte 2 + uint detect4 : 1; + uint originBlue2 : 2; + uint detect3 : 3; + uint horizRed2 : 1; // byte 3 + uint diff : 1; + uint horizRed1 : 5; + uint originBlue4 : 1; + uint horizBlue1: 1; // byte 4 + uint horizGreen : 7; + uint vertRed1 : 3; // byte 5 + uint horizBlue2 : 5; + uint vertGreen1 : 5; // byte 6 + uint vertRed2 : 3; + uint vertBlue : 6; // byte 7 + uint vertGreen2 : 2; + }; + NV_COMPILER_CHECK(sizeof(Planar) == 64/8); + + + uint64 data64; + uint32 data32[2]; + uint8 data8[8]; + Individual individual; + Differential differential; + T t; + H h; + Planar planar; +}; +NV_COMPILER_CHECK(sizeof(BlockETC) == 64/8); + + + +static const int etc_intensity_modifiers[8][4] = { + { -8, -2, 2, 8 }, + { -17, -5, 5, 17 }, + { -29, -9, 9, 29 }, + { -42, -13, 13, 42 }, + { -60, -18, 18, 60 }, + { -80, -24, 24, 80 }, + { -106, -33, 33, 106 }, + { -183, -47, 47, 183 } +}; + +static const int etc_intensity_range[8] = { + 16, 34, 58, 84, 120, 160, 212, 366 +}; + +static const int etc_th_distances[8] = { 3, 6, 11, 16, 23, 32, 41, 64 }; + +static const uint8 etc_selector_scramble[] = { 3, 2, 0, 1 }; +static const uint8 etc_selector_unscramble[] = { 2, 3, 1, 0 }; + + +static float midpoints4[16]; +NV_AT_STARTUP( + for (int i = 0; i < 15; i++) { + float f0 = float(((i+0) << 4) | ((i+0) >> 4)) / 255.0f; + float f1 = float(((i+1) << 4) | ((i+1) >> 4)) / 255.0f; + midpoints4[i] = (f0 + f1) * 0.5f; + } + midpoints4[15] = 1.0f; +); + +static const float midpoints5[32] = { + 0.015686f, 0.047059f, 0.078431f, 0.111765f, 0.145098f, 0.176471f, 0.207843f, 0.241176f, 0.274510f, 0.305882f, 0.337255f, 0.370588f, 0.403922f, 0.435294f, 0.466667f, 0.5f, + 0.533333f, 0.564706f, 0.596078f, 0.629412f, 0.662745f, 0.694118f, 0.725490f, 0.758824f, 0.792157f, 0.823529f, 0.854902f, 0.888235f, 0.921569f, 0.952941f, 0.984314f, 1.0f +}; + +//static const float midpoints6[64]; +//static const float midpoints7[128]; + + + +// ETC2 Modes: +// - ETC1: +// - two partitions (flip modes): 2*(4x2, 2x4) +// - two base colors sotred as 444+444 or 555+333 +// - two 3 bit intensity modifiers +// - T Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices. +// - H Mode. 2 colors 444, 3 bit intensity modifiers, 2 bit indices. +// - Planar mode: 3 colors 676 + +struct ETC_Data { + enum Mode { + Mode_ETC1, + Mode_T, + Mode_H, + Mode_Planar, + } mode; + + // @@ It may make more sense to store bit-expanded or even float colors here. + union { + struct { + uint16 color0; // 444 or 555 + uint16 color1; // 444 or 333 + uint8 table0; // 3 bits + uint8 table1; // 3 bits + bool flip; // partition mode + bool diff; // color encoding + } etc; + struct { + uint16 color0; // 444 + uint16 color1; // 444 + uint8 table; // 3 bits + } t, h; + struct { + uint8 ro, go, bo; // 676 + uint8 rh, gh, bh; // 676 + uint8 rv, gv, bv; // 676 + } planar; + }; + + uint8 selector[16]; // 2 bit indices (32 bits) +}; + +struct ETC_Solution { + float error = NV_FLOAT_MAX; + ETC_Data data; +}; + + +struct ETC_Options { + //bool fast_flip_mode_selection = false; + bool use_rg_etc = true; + bool enable_etc2 = true; + bool use_planar = true; + bool use_t_mode = true; + bool use_h_mode = true; + bool onebit_alpha = false; + Vector3 color_weights = Vector3(1); + + //int8 eac_search_radius = 1; // [0-3] + //bool eac_11bit_mode = false; +}; + +/*static*/ float compress_etc(Vector4 input_colors[16], float input_weights[16], const ETC_Options & options, BlockETC * output); + + +struct BlockEAC { + uint base : 8; + uint table : 4; + uint multiplier : 4; + uint selectors0 : 8; + uint selectors1 : 8; + + uint selectors2 : 8; + uint selectors3 : 8; + uint selectors4 : 8; + uint selectors5 : 8; +}; +NV_COMPILER_CHECK(sizeof(BlockEAC) == 64/8); + +struct BlockETC_EAC { + BlockEAC eac; + BlockETC etc; +}; +NV_COMPILER_CHECK(sizeof(BlockETC_EAC) == 128/8); + +// EAC: +// 8 bit base code word +// 4 bit multiplier +// 4 bit table index +// 16 * 3 bit indices. + +struct EAC_Data { + uint8 alpha; // 8 bits + uint8 multiplier; // 4 bits + uint8 table_index; // 4 bits + uint8 selector[16]; // 3 bit indices +}; + +struct EAC_Solution { + float error = NV_FLOAT_MAX; + EAC_Data data; +}; + +struct EAC_Options { + int search_radius = 1; // 0 = fast, 1 = medium, 2 = slow + bool use_11bit_mode = false; +}; + + +static const int eac_intensity_modifiers[16][8] = { + {-3, -6, -9, -15, 2, 5, 8, 14}, // 0 + {-3, -7,-10, -13, 2, 6, 9, 12}, // 1 + {-2, -5, -8, -13, 1, 4, 7, 12}, // 2 + {-2, -4, -6, -13, 1, 3, 5, 12}, // 3 + {-3, -6, -8, -12, 2, 5, 7, 11}, // 4 + {-3, -7, -9, -11, 2, 6, 8, 10}, // 5 + {-4, -7, -8, -11, 3, 6, 7, 10}, // 6 + {-3, -5, -8, -11, 2, 4, 7, 10}, // 7 + {-2, -6, -8, -10, 1, 5, 7, 9 }, // 8 + {-2, -5, -8, -10, 1, 4, 7, 9 }, // 9 + {-2, -4, -8, -10, 1, 3, 7, 9 }, // 10 + {-2, -5, -7, -10, 1, 4, 6, 9 }, // 11 + {-3, -4, -7, -10, 2, 3, 6, 9 }, // 12 + {-1, -2, -3, -10, 0, 1, 2, 9 }, // 13 + {-4, -6, -8, -9, 3, 5, 7, 8 }, // 14 + {-3, -5, -7, -9, 2, 4, 6, 8 } // 15 +}; + + + + +static void pack_etc2_block(const ETC_Data & data, BlockETC * output_block) { + BlockETC block; + + bool swap_colors = false; + + if (data.mode == ETC_Data::Mode_ETC1) { + // These are the same for individual and differential blocks. + block.individual.diff = data.etc.diff; + block.individual.flip = data.etc.flip; + block.individual.cw1 = data.etc.table0; + block.individual.cw2 = data.etc.table1; + + if (data.etc.diff) { + block.differential.red1 = data.etc.color0 >> 10; + block.differential.dred2 = data.etc.color1 >> 6; + block.differential.green1 = (data.etc.color0 >> 5) & 0x1F; + block.differential.dgreen2 = (data.etc.color1 >> 3) & 0x7; + block.differential.blue1 = data.etc.color0 & 0x1F; + block.differential.dblue2 = data.etc.color1 & 0x7; + } + else { + block.individual.red1 = data.etc.color0 >> 8; + block.individual.red2 = data.etc.color1 >> 8; + block.individual.green1 = (data.etc.color0 >> 4) & 0xF; + block.individual.green2 = (data.etc.color1 >> 4) & 0xF; + block.individual.blue1 = data.etc.color0 & 0xF; + block.individual.blue2 = data.etc.color1 & 0xF; + } + } + else if (data.mode == ETC_Data::Mode_T) { + block.t.red1a = (data.t.color0 >> 8) >> 2; + block.t.red1b = (data.t.color0 >> 8); + block.t.green1 = (data.t.color0 >> 4); + block.t.blue1 = data.t.color0; + + block.t.red2 = (data.t.color1 >> 8); + block.t.green2 = (data.t.color1 >> 4); + block.t.blue2 = data.t.color1; + + block.t.da = data.t.table >> 1; + block.t.db = data.t.table; + + block.t.diff = 1; + + // create an invalid R differential to trigger T mode + int dr = block.differential.dred2; + if (dr >= 4) dr -= 8; + int r = block.differential.red1 + dr; + + block.t.detect1 = 0; + block.t.detect2 = 1; + if (r >= 4) { + block.t.detect1 = 7; + block.t.detect2 = 0; + } + } + else if (data.mode == ETC_Data::Mode_H) { + bool table_lsb = data.h.table & 1; + swap_colors = (data.h.color0 < data.h.color1) ^ !table_lsb; + + uint16 color0 = data.h.color0; + uint16 color1 = data.h.color1; + + if (swap_colors) { + swap(color0, color1); + } + + block.h.red1 = (data.h.color0 >> 8); + block.h.green1a = (data.h.color0 >> 4) >> 1; + block.h.green1b = (data.h.color0 >> 4); + block.h.blue1a = data.h.color0 >> 3; + block.h.blue1b = data.h.color0 >> 1; + block.h.blue1c = data.h.color0; + + block.h.red2 = (data.h.color1 >> 8); + block.h.green2a = (data.h.color1 >> 4) >> 1; + block.h.green2b = (data.h.color1 >> 4); + block.h.blue2 = (data.h.color1 >> 8); + + block.h.da = data.h.table >> 2; + block.h.db = data.h.table >> 1; + + block.h.diff = 1; + + // create an invalid R differential to trigger T mode + block.h.detect1 = 0; + block.h.detect2 = 0; + block.h.detect3 = 0; + + int dr = block.differential.dred2; + int dg = block.differential.dgreen2; + if (dr >= 4) dr -= 8; + if (dg >= 4) dg -= 8; + int r = block.differential.red1 + dr; + int g = block.differential.green1 + dg; + + if (r < 0 || r > 31) { + block.h.detect1 = 1; + } + if (g >= 4) { + block.h.detect2 = 7; + block.h.detect3 = 0; + } + else { + block.h.detect2 = 0; + block.h.detect3 = 1; + } + } + + if (data.mode == ETC_Data::Mode_Planar) { + // From ETCLib: + block.planar.originRed = data.planar.ro; + block.planar.originGreen1 = data.planar.go >> 6; + block.planar.originGreen2 = data.planar.go; + block.planar.originBlue1 = data.planar.bo >> 5; + block.planar.originBlue2 = data.planar.bo >> 3; + block.planar.originBlue3 = data.planar.bo >> 1; + block.planar.originBlue4 = data.planar.bo; + + block.planar.horizRed1 = data.planar.rh >> 1; + block.planar.horizRed2 = data.planar.rh; + block.planar.horizGreen = data.planar.gh; + block.planar.horizBlue1 = data.planar.bh >> 5; + block.planar.horizBlue2 = data.planar.bh; + + block.planar.vertRed1 = data.planar.rv >> 3; + block.planar.vertRed2 = data.planar.rv; + block.planar.vertGreen1 = data.planar.gv >> 2; + block.planar.vertGreen2 = data.planar.gv; + block.planar.vertBlue = data.planar.bv; + + block.planar.diff = 1; + + // create valid RG differentials and an invalid B differential to trigger planar mode + block.planar.detect1 = 0; + block.planar.detect2 = 0; + block.planar.detect3 = 0; + block.planar.detect4 = 0; + + // @@ Clean this up. + int dr = block.differential.dred2; + int dg = block.differential.dgreen2; + int db = block.differential.dblue2; + if (dr >= 4) dr -= 8; + if (dg >= 4) dg -= 8; + if (db >= 4) db -= 8; + int r = block.differential.red1 + dr; + int g = block.differential.green1 + dg; + int b = block.differential.blue1 + db; + + if (r < 0 || r > 31) { + block.planar.detect1 = 1; + } + if (g < 0 || g > 31) { + block.planar.detect2 = 1; + } + if (b >= 4) { + block.planar.detect3 = 7; + block.planar.detect4 = 0; + } + else { + block.planar.detect3 = 0; + block.planar.detect4 = 1; + } + } + else { + block.individual.selectors = 0; + for (int i = 0; i < 16; i++) { + uint selector = data.selector[i]; + selector = etc_selector_scramble[selector]; + block.individual.selectors |= (selector >> 1) << (i ^ 8); + block.individual.selectors |= (selector & 1) << ((16 + i) ^ 8); + } + + if (swap_colors) { + block.individual.selectors ^= 0x0000FFFF; + } + } + + // @@ output_block is big endian, byte swap: + *output_block = block; +} + +static void unpack_etc2_block(const BlockETC * input_block, ETC_Data * data) { + + // @@ input_block is big endian, byte swap first: + BlockETC block = *input_block; + + // Assume ETC1 for now. + data->mode = ETC_Data::Mode_ETC1; + + // These are the same for individual and differential blocks. + data->etc.diff = block.individual.diff != 0; + data->etc.flip = block.individual.flip != 0; + data->etc.table0 = block.individual.cw1; + data->etc.table1 = block.individual.cw2; + + // Decode colors. + if (data->etc.diff) { + data->etc.color0 = U16((block.differential.red1 << 10) | (block.differential.green1 << 5) | block.differential.blue1); + data->etc.color1 = U16((block.differential.dred2 << 6) | (block.differential.dgreen2 << 3) | block.differential.dblue2); + + // @@ Clean this up. + int dr = block.differential.dred2; + int dg = block.differential.dgreen2; + int db = block.differential.dblue2; + if (dr >= 4) dr -= 8; + if (dg >= 4) dg -= 8; + if (db >= 4) db -= 8; + int r = block.differential.red1 + dr; + int g = block.differential.green1 + dg; + int b = block.differential.blue1 + db; + + // Detect ETC2 modes (invalid combinations). + if (r < 0 || r > 31) { + data->mode = ETC_Data::Mode_T; + } + else if (g < 0 || g > 31) { + data->mode = ETC_Data::Mode_H; + } + else if (b < 0 || b > 31) { + data->mode = ETC_Data::Mode_Planar; + } + } + else { + data->etc.color0 = U16((block.individual.red1 << 8) | (block.individual.green1 << 4) | block.individual.blue1); + data->etc.color1 = U16((block.individual.red2 << 8) | (block.individual.green2 << 4) | block.individual.blue2); + } + + if (data->mode == ETC_Data::Mode_T) { + uint16 r0 = U16((block.t.red1a << 2) | block.t.red1b); + uint16 g0 = U16(block.t.green1); + uint16 b0 = U16(block.t.blue1); + data->t.color0 = U16(r0 << 8) | U16(g0 << 4) | b0; + + uint16 r1 = U16(block.t.red2); + uint16 g1 = U16(block.t.green2); + uint16 b1 = U16(block.t.blue2); + data->t.color1 = U16(r1 << 8) | U16(g1 << 4) | b1; + + data->t.table = U8((block.t.da << 1) | block.t.db); + } + else if (data->mode == ETC_Data::Mode_H) { + uint16 r0 = U16(block.h.red1); + uint16 g0 = U16((block.h.green1a << 1) | block.h.green1b); + uint16 b0 = U16((block.h.blue1a << 3) | (block.h.blue1b << 1) | block.h.blue1c); + data->h.color0 = U16(r0 << 8) | U16(g0 << 4) | b0; + + uint16 r1 = U16(block.h.red2); + uint16 g1 = U16((block.h.green2a << 1) | block.h.green2b); + uint16 b1 = U16(block.h.blue2); + data->h.color1 = U16(r1 << 8) | U16(g1 << 4) | b1; + + data->h.table = U8((block.h.da << 2) | (block.h.db << 1)); + + if (data->h.color0 >= data->h.color1) { + data->h.table++; + } + } + + if (data->mode == ETC_Data::Mode_Planar) { + data->planar.ro = U8(block.planar.originRed); + data->planar.go = U8((block.planar.originGreen1 << 6) + block.planar.originGreen2); + data->planar.bo = U8((block.planar.originBlue1 << 5) + (block.planar.originBlue2 << 3) + (block.planar.originBlue3 << 1) + block.planar.originBlue4); + + data->planar.rh = U8((block.planar.horizRed1 << 1) + block.planar.horizRed2); + data->planar.gh = U8(block.planar.horizGreen); + data->planar.bh = U8((block.planar.horizBlue1 << 5) + block.planar.horizBlue2); + + data->planar.rv = U8((block.planar.vertRed1 << 3) + block.planar.vertRed2); + data->planar.gv = U8((block.planar.vertGreen1 << 2) + block.planar.vertGreen2); + data->planar.bv = U8(block.planar.vertBlue); + } + else { + // Note, selectors are arranged in columns, keep that order. + unsigned char * selectors = (uint8 *)&block.individual.selectors; + for (int i = 0; i < 16; i++) { + int byte_msb = (1 - (i / 8)); + int byte_lsb = (3 - (i / 8)); + int shift = (i & 7); + + uint msb = (selectors[byte_msb] >> shift) & 1; + uint lsb = (selectors[byte_lsb] >> shift) & 1; + + uint index = (msb << 1) | lsb; + + if (data->mode == ETC_Data::Mode_ETC1) { + data->selector[i] = etc_selector_unscramble[index]; + } + else { + // No scrambling in T & H modes. + data->selector[i] = index; + } + } + } +} + +static void pack_eac_block(const EAC_Data & data, BlockEAC * output_block) { + + output_block->base = data.alpha; + output_block->table = data.table_index; + output_block->multiplier = data.multiplier; + + uint64 selector_bits = 0; + for (uint i = 0; i < 16; i++) { + uint shift = 45 - (3 * i); + selector_bits |= uint64(data.selector[i]) << shift; + } + + output_block->selectors0 = selector_bits >> 40; + output_block->selectors1 = selector_bits >> 32; + output_block->selectors2 = selector_bits >> 24; + output_block->selectors3 = selector_bits >> 16; + output_block->selectors4 = selector_bits >> 8; + output_block->selectors5 = selector_bits >> 0; +} + +static void unpack_eac_block(const BlockEAC * input_block, EAC_Data * data) { + + data->alpha = input_block->base; + data->table_index = input_block->table; + data->multiplier = input_block->multiplier; + + uint64 selector_bits = 0; + selector_bits |= uint64(input_block->selectors0) << 40; + selector_bits |= uint64(input_block->selectors1) << 32; + selector_bits |= uint64(input_block->selectors2) << 24; + selector_bits |= uint64(input_block->selectors3) << 16; + selector_bits |= uint64(input_block->selectors4) << 8; + selector_bits |= uint64(input_block->selectors5) << 0; + for (uint i = 0; i < 16; i++) { + uint shift = 45 - (3 * i); + data->selector[i] = (selector_bits >> shift) & 0x7; + } +} + + + +// This assumes nin > nout-nin +inline int bitexpand(uint32 bits, uint nin, uint nout) { + assert(nout > nin); + //assert(nout - nin > nin); + return (bits << uint(nout - nin)) | (bits >> uint(2U * nin - nout)); +} + +// Integer color unpacking for decompressor. +static void unpack_color_444(uint32 packed_color, int * r, int * g, int * b) { + int r4 = (packed_color >> 8) & 0xF; + int g4 = (packed_color >> 4) & 0xF; + int b4 = packed_color & 0xF; + *r = r4 << 4 | r4; // bitexpand(r4, 4, 8); + *g = g4 << 4 | g4; // bitexpand(g4, 4, 8); + *b = b4 << 4 | b4; // bitexpand(b4, 4, 8); +} + +static Vector3 unpack_color_444(uint32 packed_color) { + int r, g, b; + unpack_color_444(packed_color, &r, &g, &b); + return Vector3(float(r), float(g), float(b)) * 1.0f / 255.0f; +} + +static void unpack_color_555(uint32 packed_color, int * r, int * g, int * b) { + int r5 = (packed_color >> 10) & 0x1F; + int g5 = (packed_color >> 5) & 0x1F; + int b5 = packed_color & 0x1F; + *r = (r5 << 3) | (r5 >> 2); // bitexpand(r5, 5, 8); + *g = (g5 << 3) | (g5 >> 2); // bitexpand(g5, 5, 8); + *b = (b5 << 3) | (b5 >> 2); // bitexpand(b5, 5, 8); +} + +static Vector3 unpack_color_555(uint32 packed_color) { + int r, g, b; + unpack_color_555(packed_color, &r, &g, &b); + return Vector3(float(r), float(g), float(b)) * 1.0f / 255.0f; +} + +// Returns signed r,g,b without bit expansion. +static void unpack_delta_333(uint32 packed_delta, int * r, int * g, int * b) { + *r = (packed_delta >> 6) & 7; + *g = (packed_delta >> 3) & 7; + *b = packed_delta & 7; + if (*r >= 4) *r -= 8; + if (*g >= 4) *g -= 8; + if (*b >= 4) *b -= 8; +} + +static bool unpack_color_555(uint32 packed_color, uint32 packed_delta, int * r, int * g, int * b) { + int dc_r, dc_g, dc_b; + unpack_delta_333(packed_delta, &dc_r, &dc_g, &dc_b); + + int r5 = int((packed_color >> 10U) & 0x1F) + dc_r; + int g5 = int((packed_color >> 5U) & 0x1F) + dc_g; + int b5 = int(packed_color & 0x1F) + dc_b; + + bool success = true; + if (static_cast(r5 | g5 | b5) > 31U) + { + success = false; + r5 = clamp(r5, 0, 31); + g5 = clamp(g5, 0, 31); + b5 = clamp(b5, 0, 31); + } + + *r = (r5 << 3) | (r5 >> 2); // bitexpand(r5, 5, 8); + *g = (g5 << 3) | (g5 >> 2); // bitexpand(g5, 5, 8); + *b = (b5 << 3) | (b5 >> 2); // bitexpand(b5, 5, 8); + + return success; +} + +static Vector3 unpack_color_555(uint32 packed_color, uint32 packed_delta) { + int r, g, b; + bool success = unpack_color_555(packed_color, packed_delta, &r, &g, &b); + assert(success); + return Vector3(float(r), float(g), float(b)) * 1.0f / 255.0f; +} + + +static void unpack_color_676(uint32 packed_color, int * r, int * g, int * b) { + int r6 = (packed_color >> 13) & 0x3F; + int g7 = (packed_color >> 6) & 0x7F; + int b6 = packed_color & 0x3F; + + *r = bitexpand(r6, 6, 8); // r << 2 | r >> 4 + *g = bitexpand(g7, 7, 8); // g << 1 | g >> 6 + *b = bitexpand(b6, 6, 8); // b << 2 | b >> 4 +} + + +static uint32 pack_color_444(Vector3 color) { + + // Truncate. + uint r = U32(ftoi_trunc(clamp(color.x * 15.0f, 0.0f, 15.0f))); + uint g = U32(ftoi_trunc(clamp(color.y * 15.0f, 0.0f, 15.0f))); + uint b = U32(ftoi_trunc(clamp(color.z * 15.0f, 0.0f, 15.0f))); + + // Round exactly according to 444 bit-expansion. + r += (color.x > midpoints4[r]); + g += (color.y > midpoints4[g]); + b += (color.z > midpoints4[b]); + + return (r << 8) | (g << 4) | b; +} + +static uint32 pack_color_555(Vector3 color) { + + // Truncate. + uint r = U32(ftoi_trunc(clamp(color.x * 31.0f, 0.0f, 31.0f))); + uint g = U32(ftoi_trunc(clamp(color.y * 31.0f, 0.0f, 31.0f))); + uint b = U32(ftoi_trunc(clamp(color.z * 31.0f, 0.0f, 31.0f))); + + // Round exactly according to 555 bit-expansion. + r += (color.x > midpoints5[r]); + g += (color.y > midpoints5[g]); + b += (color.z > midpoints5[b]); + + return (r << 10) | (g << 5) | b; +} + +static uint32 pack_delta_333(Vector3 delta) { + // @@ Accurate rounding of signed 3-bit components. + + int r = ftoi_round(clamp(delta.x * 31.0f, -4.0f, 3.0f)); + int g = ftoi_round(clamp(delta.y * 31.0f, -4.0f, 3.0f)); + int b = ftoi_round(clamp(delta.z * 31.0f, -4.0f, 3.0f)); + + //r += (delta.x > delta_midpoints3[r]); + //g += (delta.y > delta_midpoints3[g]); + //b += (delta.z > delta_midpoints3[b]); + + if (r < 0) r += 8; + if (g < 0) g += 8; + if (b < 0) b += 8; + return static_cast(b | (g << 3) | (r << 6)); +} + +static uint8 pack_float_6(float f) { + + // Truncate. + uint u = U32(ftoi_trunc(clamp(f * 63.0f, 0.0f, 63.0f))); + + // Round exactly according to 6 bit-expansion. + //u += (f > midpoints6[u]); + + float midpoint = 0.5f * (bitexpand(u, 6, 8) + bitexpand(min(u + 1, 63U), 6, 8)); // @@ Precompute. + u += (f > midpoint); + + return U8(u); +} + +static uint8 pack_float_7(float f) { + + // Truncate. + uint u = U32(ftoi_trunc(clamp(f * 127.0f, 0.0f, 127.0f))); + + // Round exactly according to 6 bit-expansion. + //u += (f > midpoints7[u]); + + float midpoint = 0.5f * (bitexpand(u, 7, 8) + bitexpand(min(u + 1, 127U), 7, 8)); // @@ Precompute. + u += (f > midpoint); + + return U8(u); +} + +static uint8 pack_float_6(float f, bool round_dir) { + uint u = U32(ftoi_trunc(clamp(f * 63.0f + round_dir, 0.0f, 63.0f))); + return U8(u); +} + +static uint8 pack_float_7(float f, bool round_dir) { + uint u = U32(ftoi_trunc(clamp(f * 127.0f + round_dir, 0.0f, 127.0f))); + return U8(u); +} + + + + +Vector3 get_partition_color_average(const Vector4 input_colors[16], const float input_weights[16], bool flip, int partition) { + Vector3 sum_c(0); + float sum_w = 0; + + if (flip) { + // Horizontal partition. + int offset = partition ? 8 : 0; + + for (int i = 0; i < 8; i++) { + sum_c += input_colors[i+offset].xyz() * input_weights[i+offset]; + sum_w += input_weights[i+offset]; + } + } + else { + // Vertical partition. + int offset = partition ? 2 : 0; + + for (int i = 0; i < 4; i++) { + sum_c += input_colors[i+offset].xyz() * input_weights[i+offset]; + sum_w += input_weights[i+offset]; + + sum_c += input_colors[i+offset+1].xyz() * input_weights[i+offset+1]; + sum_w += input_weights[i+offset+1]; + + offset += 2; + } + } + if (sum_w == 0) { + sum_w = 1; + } + + return sum_c * 1.0f / sum_w; +} + +// Approximate partition color using average. +Vector3 base_color_average(const Vector3 colors[8]) { + Vector3 sum_c(0); + + for (uint i = 0; i < 8; i++) { + sum_c += colors[i]; + } + + return sum_c * 1.0f / 8.0f; +} +Vector3 base_color_average(const Vector3 colors[8], const float weights[8]) { + Vector3 sum_c(0); + float sum_w = 0; + + for (uint i = 0; i < 8; i++) { + sum_c += colors[i] * weights[i]; + sum_w += weights[i]; + } + + return sum_c * 1.0f / sum_w; +} + +#if 0 +// Compute base color using least squares. +Vector3 base_color_least_squares(const Vector3 colors[8], int table_index, int indices[8]) { + + // Compute dot(C, I) and dot(I, I) + Vector3 CI(0); + float II = 0; + + for (int i = 0; i < 8; i++) { + Vector3 C = colors[i]; + float I = etc_intensity_modifiers[table_index][indices[i]]; + CI += C * I; + II += I * I; + } + + return CI / II; +} + +// @@ Do weighted least squares! +Vector3 base_color_least_squares(const Vector3 colors[8], const float weights[8], int table_index, int indices[8]) { + + // Compute dot(C, I) and dot(I, I) + Vector3 CI(0); + float II = 0; + + for (int i = 0; i < 8; i++) { + Vector3 C = colors[i]; + float w = weights[i]; + float I = etc_intensity_modifiers[table_index][indices[i]]; + CI += C * I * w; + II += I * I; + } + + return CI / II; +} + +// Is this any faster than the above? +Vector3 base_color_least_squares(const Vector3 colors[8], int table_index, int c0, int c1, int c2) { + + // Compute dot(C, I) and dot(I, I) + Vector3 CI(0); + + float I0 = etc_intensity_modifiers[table_index][0]; + float I1 = etc_intensity_modifiers[table_index][1]; + float I2 = etc_intensity_modifiers[table_index][2]; + float I3 = etc_intensity_modifiers[table_index][3]; + + float II = 0; + II += c0 * I0 * I0; + II += c1 * I1 * I1; + II += c2 * I2 * I2; + II += (8-c0-c1-c2) * I3 * I3; + + int i = 0; + for (; i < c0; i++) CI += colors[i] * I0; + for (; i < c0+c1; i++) CI += colors[i] * I1; + for (; i < c0+c1+c2; i++) CI += colors[i] * I2; + for (; i < 8; i++) CI += colors[i] * I3; + + return CI / II; +} + +static void selectors_for_clusters(int c0, int c1, int c2, int selector[8]) { + int i = 0; + for (; i < c0; i++) selector[i] = 0; + for (; i < c0+c1; i++) selector[i] = 1; + for (; i < c0+c1+c2; i++) selector[i] = 2; + for (; i < 8; i++) selector[i] = 3; +} + +static int cluster_count(int count = 8) { + int total = 0; + + for (uint c0 = 0; c0 <= count; c0++) { + for (uint c1 = 0; c1 <= count-c0; c1++) { + for (uint c2 = 0; c2 <= count-c0-c1; c2++) { + total++; + } + } + } + + // total is the number of possible cluster combinations. + return total; +} + +// Does each partition have its own table index? Or is it shared for both? + + +void test_all_total_orders(const Vector4 colors[8], const float weights[8], int table_index) { + + // @@ compute average luminance of each partition. + + + // @@ sort colors by the luminance differences respect to partition average. + + // @@ compute luminance range, pick table index based on that. Try nearest indices also? + + // For each cluster combination: +/* + for (uint c0 = 0; c0 <= count; c0++) { + for (uint c1 = 0; c1 <= count-c0; c1++) { + for (uint c2 = 0; c2 <= count-c0-c1; c2++) { + + // compute selectors. + int selector[8]; + selectors_for_clusters(c0, c1, c2, selector); + + // compute base colors that minimize error in each partition. + + // determine error for these quantized base colors. Record best cluster combination. + + } + } + } +*/ +} + +void test_all_total_orders(const Vector4 input_colors[16], const float input_weights[16], uint count, bool flip, int table_index) { + + // Slow method is to test both flip modes. + //test_all_total_orders(input_colors, input_weights, /*flip=*/false, int table_index); + //test_all_total_orders(input_colors, input_weights, /*flip=*/true, int table_index); +} + + +// @@ How do compute the error for a given base color? + +// Compute indices using range fitting / quantization of input colors? + +// Compute indices using range fitting. + +void test_all_clusters() { + int count = 8; // Could be smaller. + + for (uint c0 = 0; c0 <= count; c0++) { + Vector3 x1(0.0f); + float w1 = 0.0f; + + for (uint c1 = 0; c1 <= count-c0; c1++) { + Vector3 x2(0.0f); + float w2 = 0.0f; + + for (uint c2 = 0; c2 <= count-c0-c1; c2++) { + } + } + } + +} + +#endif + + + + + + + + +static Color32 saturate_color(int R, int G, int B) { + Color32 c; + c.r = U8(clamp(R, 0, 255)); + c.g = U8(clamp(G, 0, 255)); + c.b = U8(clamp(B, 0, 255)); + c.a = 255; + return c; +} + +static void get_diff_subblock_palette(uint16 packed_color, uint table_idx, Color32 palette[4]) { + assert(table_idx < 8); + + const int * intensity_table = etc_intensity_modifiers[table_idx]; + + int r, g, b; + unpack_color_555(packed_color, &r, &g, &b); + + for (int i = 0; i < 4; i++) { + const int y = intensity_table[i]; + palette[i] = saturate_color(r + y, g + y, b + y); + } +} + +static bool get_diff_subblock_palette(uint16 packed_color, uint16 packed_delta, uint table_idx, Color32 palette[4]) { + assert(table_idx < 8); + + const int * intensity_table = etc_intensity_modifiers[table_idx]; + + int r, g, b; + bool success = unpack_color_555(packed_color, packed_delta, &r, &g, &b); + + for (int i = 0; i < 4; i++) { + const int y = intensity_table[i]; + palette[i] = saturate_color(r + y, g + y, b + y); + } + + return success; +} + +static void get_abs_subblock_palette(uint16 packed_color, uint table_idx, Color32 palette[4]) { + assert(table_idx < 8); + + const int * intensity_table = etc_intensity_modifiers[table_idx]; + + int r, g, b; + unpack_color_444(packed_color, &r, &g, &b); + + for (int i = 0; i < 4; i++) { + const int y = intensity_table[i]; + palette[i] = saturate_color(r + y, g + y, b + y); + } +} + +static int get_selector(const ETC_Data & data, int x, int y) { + // Note selectors are arranged in column order. + return data.selector[x*4+y]; +} +static int get_partition(const ETC_Data & data, int x, int y) { + assert(data.mode == ETC_Data::Mode_ETC1); + return data.etc.flip ? y > 1 : x > 1; +} + +static void decode_etc1(const ETC_Data & data, Vector4 colors[16]) { + assert(data.mode == ETC_Data::Mode_ETC1); + + Color32 palette[2][4]; + + if (data.etc.diff) { + // Decode colors in 555+333 mode. + get_diff_subblock_palette(data.etc.color0, data.etc.table0, palette[0]); + get_diff_subblock_palette(data.etc.color0, data.etc.color1, data.etc.table1, palette[1]); + } + else { + // Decode colors in 444,444 mode. + get_abs_subblock_palette(data.etc.color0, data.etc.table0, palette[0]); + get_abs_subblock_palette(data.etc.color1, data.etc.table1, palette[1]); + } + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + colors[y*4+x] = toVector4(palette[get_partition(data, x, y)][get_selector(data, x, y)]); + } + } +} + +static void decode_etc2_t(const ETC_Data & data, Vector4 output_colors[16]) { + assert(data.mode == ETC_Data::Mode_T); + + int r, g, b; + Color32 palette[4]; + + int d = etc_th_distances[data.t.table]; + + unpack_color_444(data.t.color0, &r, &g, &b); + palette[0] = saturate_color(r, g, b); + + unpack_color_444(data.t.color1, &r, &g, &b); + palette[1] = saturate_color(r + d, g + d, b + d); + palette[2] = saturate_color(r, g, b); + palette[3] = saturate_color(r - d, g - d, b - d); + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + output_colors[y*4+x] = toVector4(palette[get_selector(data, x, y)]); + } + } +} + +static void decode_etc2_h(const ETC_Data & data, Vector4 output_colors[16]) { + assert(data.mode == ETC_Data::Mode_H); + + int r, g, b; + Color32 palette[4]; + + int d = etc_th_distances[data.t.table]; + + unpack_color_444(data.t.color0, &r, &g, &b); + palette[0] = saturate_color(r + d, g + d, b + d); + palette[1] = saturate_color(r - d, g - d, b - d); + + unpack_color_444(data.t.color1, &r, &g, &b); + palette[2] = saturate_color(r + d, g + d, b + d); + palette[3] = saturate_color(r - d, g - d, b - d); + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + output_colors[y*4+x] = toVector4(palette[get_selector(data, x, y)]); + } + } +} + +static void decode_etc2_planar(const ETC_Data & data, Vector4 output_colors[16]) { + assert(data.mode == ETC_Data::Mode_Planar); + + int ro, go, bo; // origin color + int rh, gh, bh; // horizontal color + int rv, gv, bv; // vertical color + + // Unpack from 676 + ro = bitexpand(data.planar.ro, 6, 8); // r << 2 | r >> 4 + go = bitexpand(data.planar.go, 7, 8); // g << 1 | g >> 6 + bo = bitexpand(data.planar.bo, 6, 8); + + rh = bitexpand(data.planar.rh, 6, 8); + gh = bitexpand(data.planar.gh, 7, 8); + bh = bitexpand(data.planar.bh, 6, 8); + + rv = bitexpand(data.planar.rv, 6, 8); + gv = bitexpand(data.planar.gv, 7, 8); + bv = bitexpand(data.planar.bv, 6, 8); + + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + int r = (4 * ro + x * (rh - ro) + y * (rv - ro) + 2) >> 2; + int g = (4 * go + x * (gh - go) + y * (gv - go) + 2) >> 2; + int b = (4 * bo + x * (bh - bo) + y * (bv - bo) + 2) >> 2; + + int idx = 4 * y + x; + output_colors[idx].x = saturate(float(r) / 255.0f); + output_colors[idx].y = saturate(float(g) / 255.0f); + output_colors[idx].z = saturate(float(b) / 255.0f); + output_colors[idx].w = 1; + } + } +} + +static void decode_etc2(const ETC_Data & data, Vector4 colors[16]) { + + if (data.mode == ETC_Data::Mode_ETC1) { + decode_etc1(data, colors); + } + else if (data.mode == ETC_Data::Mode_T) { + decode_etc2_t(data, colors); + } + else if (data.mode == ETC_Data::Mode_H) { + decode_etc2_h(data, colors); + } + else /*if (data.mode == ETC_Data::Mode_Planar)*/ { + decode_etc2_planar(data, colors); + } +} + + +static float get_alpha11(int base, int table, int mul, int index) { + int elevenbase = base*8+4; + int tabVal = eac_intensity_modifiers[table][index]; + int elevenTabVal = tabVal*8; + + if(mul!=0) elevenTabVal*=mul; + else elevenTabVal/=8; + + //calculate sum + int elevenbits = elevenbase+elevenTabVal; + + //clamp.. + if(elevenbits>=256*8) elevenbits=256*8-1; + else if(elevenbits<0) elevenbits=0; + //elevenbits now contains the 11 bit alpha value as defined in the spec. + + //extend to 16 bits before returning, since we don't have any good 11-bit file formats. + uint16 sixteenbits = (elevenbits<<5)+(elevenbits>>6); + + return float(sixteenbits) / 65535.0f; +} + +static float get_alpha8(int base, int table, int mul, int index) { + int value = clamp(base + eac_intensity_modifiers[table][index] * mul, 0, 255); + return value / 255.0f; +} + + + + +static void decode_eac_8(const EAC_Data & data, Vector4 output_colors[16], int output_channel = 3) { + for (int i = 0; i < 16; i++) { + int s = data.selector[4*(i%4) + i/4]; + output_colors[i].component[output_channel] = get_alpha8(data.alpha, data.table_index, data.multiplier, s); + } +} + +static void decode_eac_11(const EAC_Data & data, Vector4 output_colors[16], int output_channel = 0) { + for (int i = 0; i < 16; i++) { + int s = data.selector[4*(i%4) + i/4]; + output_colors[i].component[output_channel] = get_alpha11(data.alpha, data.table_index, data.multiplier, s); + } +} + + + + +static float evaluate_mse(const Vector3 & p, const Vector3 & c, const Vector3 & w) { + Vector3 d = (p - c) * w; + return dot(d, d); +} + +static float evaluate_rgb_mse(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, const ETC_Data & data) { + // Decode data and compare? + Vector4 colors[16]; + decode_etc2(data, colors); + + float error = 0; + for (int i = 0; i < 16; i++) { + error += input_weights[i] * evaluate_mse(input_colors[i].xyz(), colors[i].xyz(), options.color_weights); + } + return error; +} + + +static int select_table_index(const Vector3 & base_color, const Vector4 input_colors[16], const float input_weights[16], bool flip, int partition) { + + //float min_lum_delta = NV_FLOAT_MAX; + float max_lum_delta = -NV_FLOAT_MAX; + + int xb = partition ? 2 : 0; + int xe = partition ? 4 : 2; + + for (int y = 0; y < 4; y++) { + for (int x = xb; x < xe; x++) { + int idx = flip ? x*4 + y : y*4 + x; + float lum_delta = dot(base_color, Vector3(1.0f/3)) - dot(input_colors[idx].xyz(), Vector3(1.0f/3)); + //min_lum_delta = min(min_lum_delta, lum_delta); + max_lum_delta = max(max_lum_delta, fabsf(lum_delta)); + } + } + + int best_range = -1; + float best_error = NV_FLOAT_MAX; + for (int i = 0; i < 8; i++) { + float error = fabsf(etc_intensity_range[i] - 255 * max_lum_delta); + if (error < best_error) { + best_error = error; + best_range = i; + } + } + + return best_range; +} + +static float update_selectors(const Vector4 input_colors[16], const float input_weights[16], ETC_Data & data, const ETC_Options & options) { + + Color32 palette[2][4]; + + if (data.etc.diff) { + // Decode colors in 555+333 mode. + get_diff_subblock_palette(data.etc.color0, data.etc.table0, palette[0]); + get_diff_subblock_palette(data.etc.color0, data.etc.color1, data.etc.table1, palette[1]); + } + else { + // Decode colors in 444,444 mode. + get_abs_subblock_palette(data.etc.color0, data.etc.table0, palette[0]); + get_abs_subblock_palette(data.etc.color1, data.etc.table1, palette[1]); + } + + float total_error = 0; + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + int i = y*4 + x; + + float best_error = NV_FLOAT_MAX; + int best_p = 0; + + for (int p = 0; p < 4; p++) { + float error = evaluate_mse(toVector3(palette[get_partition(data, x, y)][p]), input_colors[i].xyz(), options.color_weights); + if (error < best_error) { + best_error = error; + best_p = p; + } + } + + int s = x*4 + y; + data.selector[s] = U8(best_p); + + total_error += best_error * input_weights[i]; + } + } + + return total_error; +} + + +static void partition_input_block(const Vector4 input_colors[16], const float input_weights[16], bool flip, int partition, Vector3 output_colors[8], float output_weights[8]) { + + const int xb = partition ? 2 : 0; + const int xe = partition ? 4 : 2; + + for (int y = 0, i = 0; y < 4; y++) { + for (int x = xb; x < xe; x++, i++) { + int idx = flip ? x*4 + y : y*4 + x; + + output_colors[i] = input_colors[idx].xyz(); + output_weights[i] = input_weights[idx]; + } + } +} + + +struct ETC_SubBlock { + Vector3 color; + bool delta; + int table; + int indices[8]; +}; + +static float evaluate_rgb_mse(const Vector3 colors[8], const float weights[8], const ETC_Options & options, ETC_SubBlock * sub_block) { + + // Evaluate sub block palette. + Vector3 palette[4]; + palette[0] = sub_block->color + Vector3(etc_intensity_modifiers[sub_block->table][0] / 255.0f); + palette[1] = sub_block->color + Vector3(etc_intensity_modifiers[sub_block->table][1] / 255.0f); + palette[2] = sub_block->color + Vector3(etc_intensity_modifiers[sub_block->table][2] / 255.0f); + palette[3] = sub_block->color + Vector3(etc_intensity_modifiers[sub_block->table][3] / 255.0f); + + float mse = 0; + for (int i = 0; i < 8; i++) { + mse += evaluate_mse(colors[i], palette[sub_block->indices[i]], options.color_weights) * weights[i]; + } + + return mse; +} + +static void optimize_base_color(const Vector3 colors[8], const float weights[8], ETC_SubBlock * sub_block) { + + // @@ For a given index selection, find color that minimizes the error. RGB components are independent. + + float D_sum = 0; + float R_sum = 0; + float G_sum = 0; + float B_sum = 0; + float W_sum = 0; + + for (int i = 0; i < 8; i++) { + float Di = etc_intensity_modifiers[sub_block->table][sub_block->indices[i]] / 255.0f; // @@ precompute? + + D_sum += Di * weights[i]; + R_sum += colors[i].x * weights[i]; + G_sum += colors[i].y * weights[i]; + B_sum += colors[i].z * weights[i]; + W_sum += weights[i]; + } + + sub_block->color.x = (R_sum - D_sum) / W_sum; + sub_block->color.y = (R_sum - D_sum) / W_sum; + sub_block->color.z = (R_sum - D_sum) / W_sum; + + // @@ Estimate error (without quantization) + + + + // @@ Repeat for all tables? + + // @@ Given a new center, compute new indices, then update center? + +} + + + +static int reduce_colors(Vector3 * colors, float * weights, int count) { + + int n = 0; + + for (int i = 0; i < count; i++) { + + if (weights[i] == 0.0f) { + // skip without incrementing n. + continue; + } + + colors[n] = colors[i]; + weights[n] = weights[i]; + + // find color[j] that matches color[i] + for (int j = i + 1; j < count; j++) { + if (colors[i] == colors[j]) { // @@ Compare within threshold? + weights[n] += weights[j]; + weights[j] = 0.0f; + } + } + + n++; + } + + return n; +} + +// stable sort. in place. +static void sort_colors(Vector3 * colors, float * weights, int count) { + assert(count <= 8); + + // build the list of values + //int order[8]; + float lum[8]; + for (int i = 0; i < count; ++i) { + //order[i] = i; + lum[i] = colors[i].x + colors[i].y + colors[i].z; + } + + // stable sort + for (int i = 0; i < count; ++i) { + for (int j = i; j > 0 && lum[j] < lum[j - 1]; --j) { + swap(lum[j], lum[j - 1]); + //swap(order[j], order[j - 1]); + swap(colors[j], colors[j - 1]); + } + } +} + + + +/* +float optimize_center(float colors[4][10], uniform int p, uniform int table_level) +{ + float best_center = 0; + for (uniform int q = 0; q < 4; q++) + { + best_center += (colors[q][7 + p] - get_etc1_dY(table_level, q)) * colors[q][3]; + } + best_center /= 8; + + float best_err = 0; + for (uniform int q = 0; q < 4; q++) + { + float dY = get_etc1_dY(table_level, q); + best_err += sq(clamp(best_center + dY, 0, 255) - colors[q][7 + p]) * colors[q][3]; + } + + for (uniform int branch = 0; branch < 4; branch++) + { + float new_center = 0; + float sum = 0; + for (uniform int q = 0; q < 4; q++) + { + if (branch <= 1 && q <= branch) continue; + if (branch >= 2 && q >= branch) continue; + new_center += (colors[q][7 + p] - get_etc1_dY(table_level, q)) * colors[q][3]; + sum += colors[q][3]; + } + + new_center /= sum; + + float err = 0; + for (uniform int q = 0; q < 4; q++) + { + float dY = get_etc1_dY(table_level, q); + err += sq(clamp(new_center + dY, 0, 255) - colors[q][7 + p]) * colors[q][3]; + } + + if (err < best_err) + { + best_err = err; + best_center = new_center; + } + } + + return best_center; +} +*/ + + + + +static void compress_etc1_test(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) { + + Vector3 colors[8]; + float weights[8]; + //int xrefs[8]; + ETC_SubBlock sub_block[2]; + + bool best_flip = false; + for (int flip = 0; flip <= 1; flip++) { + + partition_input_block(input_colors, input_weights, !!flip, /*partition=*/0, colors, weights); + + int count = reduce_colors(colors, weights, 8); + + //sort_colors(colors, weights); + + // @@ sort colors along luminance axis. + + //sub_block[0].color + + partition_input_block(input_colors, input_weights, !!flip, /*partition=*/1, colors, weights); + + } + + //pack_colors(sub_block[0].color, sub_block[1].color, &result->data); + + result->error = update_selectors(input_colors, input_weights, result->data, options); + +} + +/*void pack_colors(const Vector3 & color0, const Vector3 & color1, const ETC_Options & options, ETC_Data * data) { + + uint16 abs_c0 = U16(pack_color_444(color0)); + uint16 abs_c1 = U16(pack_color_444(color1)); + Vector3 abs_vc0 = unpack_color_444(abs_c0); + Vector3 abs_vc1 = unpack_color_444(abs_c1); + float abs_error = evaluate_mse(color0, abs_vc0, options.color_weights) + evaluate_mse(color1, abs_vc1, options.color_weights); + + uint16 diff_c0 = U16(pack_color_555(color0)); + Vector3 diff_vc0 = unpack_color_555(diff_c0); + uint16 diff_d1 = U16(pack_delta_333(color1 - diff_vc0)); + Vector3 diff_vc1 = unpack_color_555(diff_c0, diff_d1); + float diff_error = evaluate_mse(color0, diff_vc0, options.color_weights) + evaluate_mse(color1, diff_vc1, options.color_weights); + + if (diff_error < abs_error) { + data->etc.color0 = diff_c0; + data->etc.color1 = diff_c1; + return diff_error; + } + else { + if (abs_error < best_error) { + best_error = abs_error; + best_diff = false; + best_flip = flip; + best_c0 = abs_c0; + best_c1 = abs_c1; + best_vc0 = abs_vc0; + best_vc1 = abs_vc1; + } + } +}*/ + +static void compress_etc1_range_fit(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) { + + float best_error = NV_FLOAT_MAX; + bool best_diff = false; + bool best_flip = false; + uint16 best_c0 = 0; + uint16 best_c1 = 0; + Vector3 best_vc0; + Vector3 best_vc1; + + for (int flip = 0; flip <= 1; flip++) { + Vector3 color0 = get_partition_color_average(input_colors, input_weights, !!flip, /*partition=*/0); + Vector3 color1 = get_partition_color_average(input_colors, input_weights, !!flip, /*partition=*/1); + + uint16 abs_c0 = U16(pack_color_444(color0)); + uint16 abs_c1 = U16(pack_color_444(color1)); + Vector3 abs_vc0 = unpack_color_444(abs_c0); + Vector3 abs_vc1 = unpack_color_444(abs_c1); + float abs_error = evaluate_mse(color0, abs_vc0, options.color_weights) + evaluate_mse(color1, abs_vc1, options.color_weights); + + uint16 diff_c0 = U16(pack_color_555(color0)); + Vector3 diff_vc0 = unpack_color_555(diff_c0); + uint16 diff_d1 = U16(pack_delta_333(color1 - diff_vc0)); + Vector3 diff_vc1 = unpack_color_555(diff_c0, diff_d1); + float diff_error = evaluate_mse(color0, diff_vc0, options.color_weights) + evaluate_mse(color1, diff_vc1, options.color_weights); + + if (diff_error < abs_error) { + if (diff_error < best_error) { + best_error = diff_error; + best_diff = true; + best_flip = !!flip; + best_c0 = diff_c0; + best_c1 = diff_d1; + best_vc0 = diff_vc0; + best_vc1 = diff_vc1; + } + } + else { + if (abs_error < best_error) { + best_error = abs_error; + best_diff = false; + best_flip = !!flip; + best_c0 = abs_c0; + best_c1 = abs_c1; + best_vc0 = abs_vc0; + best_vc1 = abs_vc1; + } + } + } + + + result->data.mode = ETC_Data::Mode_ETC1; + result->data.etc.flip = best_flip; + result->data.etc.diff = best_diff; + result->data.etc.table0 = select_table_index(best_vc0, input_colors, input_weights, best_flip, /*partition=*/0); + result->data.etc.table1 = select_table_index(best_vc1, input_colors, input_weights, best_flip, /*partition=*/1); + result->data.etc.color0 = best_c0; + result->data.etc.color1 = best_c1; + + result->error = update_selectors(input_colors, input_weights, result->data, options); + + result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data); +} + +#if HAVE_RGETC +#include "nvimage/ColorBlock.h" + +void compress_etc1_rg(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) { + + rg_etc1::etc1_pack_params pack_params; + //pack_params.m_quality = rg_etc1::cLowQuality; + pack_params.m_quality = rg_etc1::cMediumQuality; // @@ Select quality based on compression options. + + ColorBlock rgba; + for (uint i = 0; i < 16; i++) { + rgba.color(i) = toColor32(input_colors[i]); + } + rgba.swizzle(2, 1, 0, 3); + + BlockETC block; + rg_etc1::pack_etc1_block((void *)&block, (const uint *)rgba.colors(), pack_params); + + unpack_etc2_block(&block, &result->data); + + result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data); +} +#endif + +static void compress_etc2_planar_solid(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) { + + Vector3 C(0); + float W = 0; + + for (int i = 0; i < 16; i++) { + C += input_colors[i].xyz() * input_weights[i]; + W += input_weights[i]; + } + + C /= W; + + // Convert colors to 676 + result->data.mode = ETC_Data::Mode_Planar; + result->data.planar.ro = pack_float_6(C.x); + result->data.planar.go = pack_float_7(C.y); + result->data.planar.bo = pack_float_6(C.z); + + result->data.planar.rh = result->data.planar.ro; + result->data.planar.gh = result->data.planar.go; + result->data.planar.bh = result->data.planar.bo; + + result->data.planar.rv = result->data.planar.ro; + result->data.planar.gv = result->data.planar.go; + result->data.planar.bv = result->data.planar.bo; + + // Evaluate error. + result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data); +} + +// Least squares optimization of planar endpoints. +static void compress_etc2_planar_lsqr(const Vector4 input_colors[16], const float input_weights[16], const ETC_Options & options, ETC_Solution * result) { + + // Isn't this a simple least squares problem? + // - Yes, but that doesn't take clamping and quantization into account. + // - Solve the least squares problem, then refine endpoints? + + // This matrix is always the same! But not when using arbitrary weights! + // This would be faster computing the matrix first, then multiplying by the weight covariance matrix. + Matrix3 m(0); + + // For every pixel, decoder does: + // int r = (4 * ro + x * (rh - ro) + y * (rv - ro) + 2) >> 2; + + // R(x,y) = (4 * ro + x * (rh - ro) + y * (rv - ro) + 2) / 4; + + // R(x,y) = ro * (1 - x/4 - y/4) + rh * x/4 + rv * y/4 + 1/2; + + // a = x/4 + // b = y/4 + // c = 1 - a - b + // R(x,y) = ro * c + rh * a + rv * b + 1/2; + + float A[3 * 16]; + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + float w = input_weights[4*y+x]; + //if ((x == 1 || x == 2) && (y == 1 && y == 2)) w *= 0.5; + + float a = float(x) / 4 * w; + float b = float(y) / 4 * w; + float c = (1 - a - b) * w; + + int i = y*4 + x; + A[3 * i + 0] = a; + A[3 * i + 1] = b; + A[3 * i + 2] = c; + + /*for (int yy = 0; yy < 4; yy++) { + for (int xx = 0; xx < 4; xx++) { + float ww = input_weights[4*yy+xx]; + //if ((xx == 1 || xx == 2) && (yy == 1 && yy == 2)) ww *= 0.5; + + float aa = float(xx) / 4 * ww; + float bb = float(yy) / 4 * ww; + float cc = (1 - aa - bb) * ww; + + m(0,0) += a * aa; + m(1,0) += b * aa; + m(2,0) += c * aa; + + m(0,1) += a * bb; + m(1,1) += b * bb; + m(2,1) += c * bb; + + m(0,2) += a * cc; + m(1,2) += b * cc; + m(2,2) += c * cc; + } + }*/ + } + } + + // At*A + for (int y = 0; y < 3; y++) { + for (int x = 0; x < 3; x++) { + float d = 0; + for (int i = 0; i < 16; i++) { + d += A[3*i+x] * A[3*i+y]; + } + m(x, y) = d; + } + } + + // Compute right side: + Vector3 Ca(0), Cb(0), Cc(0); + + for (int y = 0; y < 4; y++) { + for (int x = 0; x < 4; x++) { + float a = float(x) / 4; + float b = float(y) / 4; + float c = 1 - a - b; + + Vector3 C = input_colors[4*y+x].xyz() - Vector3(0.5f / 255); + + Ca += C * a; + Cb += C * b; + Cc += C * c; + } + } + + // Now we have 3 equations (one for each color component). + + Vector3 R(Ca.x, Cb.x, Cc.x); + Vector3 G(Ca.y, Cb.y, Cc.y); + Vector3 B(Ca.z, Cb.z, Cc.z); + + Vector3 r, g, b; + + if (!solveLU(m, R, &r)) { + result->error = NV_FLOAT_MAX; + return; + } + if (!solveLU(m, G, &g)) { + result->error = NV_FLOAT_MAX; + return; + } + if (!solveLU(m, B, &b)) { + result->error = NV_FLOAT_MAX; + return; + } + + Vector3 Ch(r.x, g.x, b.x); + Vector3 Cv(r.y, g.y, b.y); + Vector3 Co(r.z, g.z, b.z); + + // Convert colors to 676 + result->data.mode = ETC_Data::Mode_Planar; + result->data.planar.ro = pack_float_6(Co.x); + result->data.planar.go = pack_float_7(Co.y); + result->data.planar.bo = pack_float_6(Co.z); + + result->data.planar.rh = pack_float_6(Ch.x); + result->data.planar.gh = pack_float_7(Ch.y); + result->data.planar.bh = pack_float_6(Ch.z); + + result->data.planar.rv = pack_float_6(Cv.x); + result->data.planar.gv = pack_float_7(Cv.y); + result->data.planar.bv = pack_float_6(Cv.z); + + // Evaluate error. + result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data); + + bool refine_endpoints = true; + if (refine_endpoints) { + ETC_Solution best = *result; + + // @@ The per-component errors are not correllated, test 8 combinations 3 times. + for (int i = 0; i < 8; i++) { + result->data.planar.ro = pack_float_6(Co.x, (i & 1) != 0); + result->data.planar.rh = pack_float_6(Ch.x, (i & 2) != 0); + result->data.planar.rv = pack_float_6(Cv.x, (i & 4) != 0); + + result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data); + if (result->error < best.error) { + best = *result; + } + } + + *result = best; + + for (int i = 0; i < 8; i++) { + result->data.planar.go = pack_float_7(Co.y, (i & 1) != 0); + result->data.planar.gh = pack_float_7(Ch.y, (i & 2) != 0); + result->data.planar.gv = pack_float_7(Cv.y, (i & 4) != 0); + + result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data); + if (result->error < best.error) { + best = *result; + } + } + + *result = best; + + for (int i = 0; i < 8; i++) { + result->data.planar.bo = pack_float_6(Co.z, (i & 1) != 0); + result->data.planar.bh = pack_float_6(Ch.z, (i & 2) != 0); + result->data.planar.bv = pack_float_6(Cv.z, (i & 4) != 0); + + result->error = evaluate_rgb_mse(input_colors, input_weights, options, result->data); + if (result->error < best.error) { + best = *result; + } + } + + *result = best; + } +} + + +static void process_input_colors(Vector4 input_colors[16]) { + for (int i = 0; i < 16; i++) { + input_colors[i] = saturate(input_colors[i]); + + // @@ Sanitize input_weights? + // - Avoid blocks with all zero weight. + // - Normalize weights to avoid too small values? + // - Remove NaNs, infinites, etc. + } +} + +static void process_input_alphas(Vector4 input_colors[16], int input_channel) { + for (int i = 0; i < 16; i++) { + input_colors[i].component[input_channel] = saturate(input_colors[i].component[input_channel]); + } +} + +static void process_input_weights(float input_weights[16]) { + float max_weight = 0.0f; + for (int i = 0; i < 16; i++) { + max_weight = nv::max(max_weight, input_weights[i]); + } + + const float min_weight = 0.0001f; + + if (max_weight <= min_weight) { + // Handle degenerate case. + for (int i = 0; i < 16; i++) { + input_weights[i] = 1; + } + } + else { + for (int i = 0; i < 16; i++) { + // Clamp to positive. + input_weights[i] = nv::max(input_weights[i], 0.0f); + + // Flush to zero. + if (input_weights[i] < min_weight) input_weights[i] = 0.0f; + + // Normalize. + input_weights[i] /= max_weight; + } + } +} + + + + +static float compress_etc_a1(Vector4 input_colors[16], float input_weights[16], const ETC_Options & options, void * output) { + assert(options.onebit_alpha == true); + + // Classify block. + bool transparent_block = true; + bool opaque_block = true; + + for (int i = 0; i < 16; i++) { + if (input_colors[i].w != 0) transparent_block = false; + if (input_colors[i].w != 1) opaque_block = false; + } + + if (transparent_block) { + // @@ Encode trivial transparent block. + return 0; + } + + if (opaque_block) { + // @@ Encode block with opaque bit set. @@ Isn't this like the standard encoder? + } + + // @@ Encode mixed block. + nvCheck(false); // Not implemented! + + //uint8 color_rgb[16*3]; + //uint8 alpha[16]; + //uint etc_word1, etc_word2; + //compressBlockDifferentialWithAlpha(bool isTransparent, uint8* img, uint8* alphaimg, uint8* imgdec, 4, 4, 0, 0, &etc_word1, &etc_word2); + + return NV_FLOAT_MAX; +} + +//uint etc_blocks = 0; +//uint planar_blocks = 0; +//#include "nvthread/Atomic.h" + +static float compress_etc(Vector4 input_colors[16], float input_weights[16], const ETC_Options & options, void * output) { + assert(options.onebit_alpha == false); + + ETC_Solution result; + compress_etc1_range_fit(input_colors, input_weights, options, &result); + + if (options.use_rg_etc) { +#if HAVE_RGETC + ETC_Solution rg_result; + compress_etc1_rg(input_colors, input_weights, options, &rg_result); + if (rg_result.error < result.error) { + result = rg_result; + } +#else + // @@ Print warning? +#endif + } + + if (options.enable_etc2) { + if (options.use_planar) { + ETC_Solution planar_result; + compress_etc2_planar_lsqr(input_colors, input_weights, options, &planar_result); + + if (planar_result.error < result.error) { + result = planar_result; + //nv::atomicIncrement(&planar_blocks); + } + else { + //nv::atomicIncrement(&etc_blocks); + } + } + if (options.use_t_mode) { + // @@ + } + if (options.use_h_mode) { + // @@ + } + } + + pack_etc2_block(result.data, (BlockETC *)output); + + return result.error; +} + + +// Range search EAC compressor, slightly modified from ETCLib. +float compress_eac_range_search(Vector4 input_colors[16], float input_weights[16], int input_channel, const EAC_Options & options, void * output) { + + // Find alpha range + float min_a = 1.0f; + float max_a = 0.0f; + for (uint i = 0; i < 16; i++) { + float a = input_colors[i].component[input_channel]; + min_a = nv::min(min_a, a); + max_a = nv::max(max_a, a); + } + const float range_a = max_a - min_a; + + EAC_Solution best; + best.error = NV_FLOAT_MAX; + + // try each modifier table entry + static const uint MODIFIER_TABLE_ENTRYS = 16; + for (uint t = 0; t < MODIFIER_TABLE_ENTRYS; t++) { + static const uint MIN_VALUE_SELECTOR = 3; + static const uint MAX_VALUE_SELECTOR = 7; + + const float fTableEntryCenter = (float)-eac_intensity_modifiers[t][MIN_VALUE_SELECTOR]; + const float fTableEntryRange = (float)eac_intensity_modifiers[t][MAX_VALUE_SELECTOR] - eac_intensity_modifiers[t][MIN_VALUE_SELECTOR]; + const float fCenterRatio = fTableEntryCenter / fTableEntryRange; + + const int center = ftoi_round(255.0f * (min_a + fCenterRatio * range_a)); + const int min_base = max(0, center - options.search_radius); + const int max_base = min(center + options.search_radius, 255); + + for (int base = min_base; base <= max_base; base++) { + int range_multiplier = ftoi_round(255 * range_a / fTableEntryRange); + const int min_multiplier = clamp(range_multiplier - options.search_radius, 1, 15); + const int max_multiplier = clamp(range_multiplier + options.search_radius, 1, 15); + + for (int multiplier = min_multiplier; multiplier <= max_multiplier; multiplier++) { + + // find best selector for each pixel + float block_error = 0; + uint best_selector[16]; + for (uint i = 0; i < 16; i++) { + + float best_error_a = NV_FLOAT_MAX; + + static const uint ALPHA_SELECTOR_BITS = 3; + static const uint ALPHA_SELECTORS = 1 << ALPHA_SELECTOR_BITS; + for (uint s = 0; s < ALPHA_SELECTORS; s++) { + float alpha; + if (options.use_11bit_mode) { + alpha = get_alpha11(base, t, multiplier, s); + } + else { + alpha = get_alpha8(base, t, multiplier, s); + } + + float error_a = alpha - input_colors[i].component[input_channel]; + error_a = error_a * error_a; + + if (error_a < best_error_a) { + best_error_a = error_a; + best_selector[i] = s; + } + } + + block_error += best_error_a * input_weights[i]; + if (block_error > best.error) { + break; // Don't waste more time. + } + } + + if (block_error < best.error) { + best.error = block_error; + + best.data.alpha = base; + best.data.multiplier = multiplier; + best.data.table_index = t; + for (uint i = 0; i < 16; i++) { + // Flip selectors. + best.data.selector[i] = best_selector[4*(i%4) + i/4]; + } + } + } + } + } + + pack_eac_block(best.data, (BlockEAC *)output); + + return best.error; +} + + + + +// Public API: + +void nv::decompress_etc(const void * input_block, Vector4 output_colors[16]) { +#if 1 // Our code + ETC_Data data; + unpack_etc2_block((const BlockETC *)input_block, &data); + + decode_etc2(data, output_colors); + +#elif HAVE_RGETC && 0 + + Color32 colors[16]; + rg_etc1::unpack_etc1_block(input_block, &colors->u); + + for (int i = 0; i < 16; i++) { + output_colors[i].x = colors[i].b * (1.0f / 255.0f); + output_colors[i].y = colors[i].g * (1.0f / 255.0f); + output_colors[i].z = colors[i].r * (1.0f / 255.0f); + output_colors[i].w = colors[i].a * (1.0f / 255.0f); + } + +#elif HAVE_ETCPACK // Use etcpack for reference. + const BlockETC * block = (const BlockETC *)input_block; + + uint8 colors[3*16]; + uint part1 = POSH_SwapU32(block->data32[0]); + uint part2 = POSH_SwapU32(block->data32[1]); + decompressBlockETC2(part1, part2, colors, 4, 4, 0, 0); + + for (int i = 0; i < 16; i++) { + output_colors[i].x = colors[3*i+0] * (1.0f / 255.0f); + output_colors[i].y = colors[3*i+1] * (1.0f / 255.0f); + output_colors[i].z = colors[3*i+2] * (1.0f / 255.0f); + output_colors[i].w = 1.0f; + } +#endif +} + +void nv::decompress_eac(const void * input_block, Vector4 output_colors[16], int output_channel) { + nvCheck(output_channel >= 0 && output_channel < 4); + +#if 1 + EAC_Data data; + unpack_eac_block((const BlockEAC *)input_block, &data); + decode_eac_11(data, output_colors, output_channel); + +#elif HAVE_ETCPACK + // Use etcpack for reference. + formatSigned = 0; + + uint16 alphas[16]; + decompressBlockAlpha16bit((uint8*)input_block, (uint8*)alphas, 4, 4, 0, 0); + + for (int i = 0; i < 16; i++) { + uint16 alpha = POSH_SwapU16(alphas[i]); + output_colors[i].component[output_channel] = alpha * (1.0f / 65535.0f); + } +#endif +} + +void nv::decompress_etc_eac(const void * input, Vector4 output_colors[16]) { +#if 1 + BlockETC_EAC * input_block = (BlockETC_EAC *)input; + + ETC_Data etc; + unpack_etc2_block(&input_block->etc, &etc); + decode_etc2(etc, output_colors); + + EAC_Data eac; + unpack_eac_block(&input_block->eac, &eac); + decode_eac_8(eac, output_colors, 3); + +#elif HAVE_ETCPACK + // Use etcpack for reference. + uint8 colors[4*16]; + decompressBlockAlpha((uint8*)input_block, colors, 4, 4, 0, 0); + + for (int i = 0; i < 16; i++) { + output_colors[i].x = colors[4*i+0] * (1.0f / 255.0f); + output_colors[i].y = colors[4*i+1] * (1.0f / 255.0f); + output_colors[i].z = colors[4*i+2] * (1.0f / 255.0f); + output_colors[i].w = colors[4*i+3] * (1.0f / 255.0f); + } +#endif +} + +float nv::compress_etc1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output) { + + process_input_colors(input_colors); + + // @@ Use same options for all blocks? + ETC_Options options; + options.use_rg_etc = true; + options.enable_etc2 = false; + options.use_t_mode = false; + options.use_h_mode = false; + options.use_planar = false; + options.color_weights = color_weights; + + return compress_etc(input_colors, input_weights, options, output); +} + +float nv::compress_etc2(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output) { + + process_input_colors(input_colors); + process_input_weights(input_weights); + + ETC_Options options; + options.use_rg_etc = true; + options.enable_etc2 = true; + options.use_t_mode = false; // @@ Not implemented. + options.use_h_mode = false; // @@ Not implemented. + options.use_planar = true; + options.color_weights = color_weights; + + return compress_etc(input_colors, input_weights, options, output); +} + +float nv::compress_etc2_a1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output) { + + process_input_colors(input_colors); + process_input_weights(input_weights); + + ETC_Options options; + options.use_rg_etc = true; + options.enable_etc2 = true; + options.use_t_mode = false; // @@ Not implemented. + options.use_h_mode = false; // @@ Not implemented. + options.use_planar = true; + options.onebit_alpha = true; + options.color_weights = color_weights; + + return compress_etc_a1(input_colors, input_weights, options, output); +} + + +float nv::compress_eac(Vector4 input_colors[16], float input_weights[16], int input_channel, int search_radius, bool use_11bit_mode, void * output) { + nvCheck(input_channel >= 0 && input_channel < 4); + + process_input_alphas(input_colors, input_channel); + process_input_weights(input_weights); + + EAC_Options options; + options.search_radius = search_radius; + options.use_11bit_mode = use_11bit_mode; + + return compress_eac_range_search(input_colors, input_weights, input_channel, options, output); +} + +float nv::compress_etc2_eac(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output) { + BlockETC_EAC * output_block = (BlockETC_EAC *)output; + float error = compress_etc2(input_colors, input_weights, color_weights, &output_block->etc); + error += compress_eac(input_colors, input_weights, /*input_channel=*/3, /*search_radius=*/1, /*use_11bit_mode=*/false, &output_block->eac); + return error; +} + + + + + + + + + + + + + + + + diff --git a/src/nvtt/CompressorETC.h b/src/nvtt/CompressorETC.h new file mode 100644 index 0000000..3101007 --- /dev/null +++ b/src/nvtt/CompressorETC.h @@ -0,0 +1,20 @@ +#include "nvcore/nvcore.h" + +namespace nv { + + class Vector3; + class Vector4; + + void decompress_etc(const void * input_block, Vector4 output_colors[16]); + void decompress_eac(const void * input_block, Vector4 output_colors[16], int output_channel); + void decompress_etc_eac(const void * input_block, Vector4 output_colors[16]); + + float compress_etc1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output); + float compress_etc2(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output); + float compress_etc2_a1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output); + float compress_eac(Vector4 input_colors[16], float input_weights[16], int input_channel, int search_radius, bool use_11bit_mode, void * output); + float compress_etc2_eac(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output); + +} + + diff --git a/src/nvtt/CompressorRGB.cpp b/src/nvtt/CompressorRGB.cpp index 7578883..861a882 100644 --- a/src/nvtt/CompressorRGB.cpp +++ b/src/nvtt/CompressorRGB.cpp @@ -250,6 +250,8 @@ namespace // Compute shared exponent. int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B; + nvDebugCheck(exp_shared_p <= Emax); + nvDebugCheck(exp_shared_p >= 0); int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N))); @@ -279,7 +281,7 @@ namespace { float v = max3(r, g, b); - uint rgbe; + uint rgbe = 0; if (v < 1e-32) { rgbe = 0; @@ -534,6 +536,7 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint } else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) { // @@ + ir = ig = ib = ia = 0; } else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) { ir = iround(clamp(r, 0.0f, 65535.0f)); @@ -543,6 +546,11 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint } else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) { // @@ + ir = ig = ib = ia = 0; + } + else { + // @@ + ir = ig = ib = ia = 0; } uint p = 0; diff --git a/src/nvtt/Context.cpp b/src/nvtt/Context.cpp index 6aacc90..37c201b 100644 --- a/src/nvtt/Context.cpp +++ b/src/nvtt/Context.cpp @@ -39,6 +39,7 @@ #include "cuda/CudaCompressorDXT.h" #include "nvimage/DirectDrawSurface.h" +#include "nvimage/KtxFile.h" #include "nvimage/ColorBlock.h" #include "nvimage/BlockDXT.h" #include "nvimage/Image.h" @@ -51,6 +52,7 @@ #include "nvcore/Memory.h" #include "nvcore/Ptr.h" +#include "nvcore/Array.inl" using namespace nv; using namespace nvtt; @@ -222,11 +224,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c return false; } - nvtt::Surface img; - img.setWrapMode(inputOptions.wrapMode); - img.setAlphaMode(inputOptions.alphaMode); - img.setNormalMap(inputOptions.isNormalMap); - const int faceCount = inputOptions.faceCount; int width = inputOptions.width; int height = inputOptions.height; @@ -244,97 +241,230 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel); } - if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, img.isNormalMap(), compressionOptions, outputOptions)) { + if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, inputOptions.isNormalMap, compressionOptions, outputOptions)) { return false; } - // Output images. - for (int f = 0; f < faceCount; f++) + if (outputOptions.container != Container_KTX) { + nvtt::Surface img; + img.setWrapMode(inputOptions.wrapMode); + img.setAlphaMode(inputOptions.alphaMode); + img.setNormalMap(inputOptions.isNormalMap); + + // Output each face from the largest mipmap to the smallest. + for (int f = 0; f < faceCount; f++) + { + int w = width; + int h = height; + int d = depth; + bool canUseSourceImagesForThisFace = canUseSourceImages; + + img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]); + + // To normal map. + if (inputOptions.convertToNormalMap) { + img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w); + img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w); + } + + // To linear space. + if (!img.isNormalMap()) { + img.toLinear(inputOptions.inputGamma); + } + + // Resize input. + img.resize(w, h, d, ResizeFilter_Box); + + nvtt::Surface tmp = img; + if (!img.isNormalMap()) { + tmp.toGamma(inputOptions.outputGamma); + } + + quantize(tmp, compressionOptions); + compress(tmp, f, 0, compressionOptions, outputOptions); + + for (int m = 1; m < mipmapCount; m++) { + w = max(1, w/2); + h = max(1, h/2); + d = max(1, d/2); + + int idx = m * faceCount + f; + + bool useSourceImages = false; + if (canUseSourceImagesForThisFace) { + if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level. + canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images. + } + else { + useSourceImages = true; + } + } + + if (useSourceImages) { + img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]); + + // For already generated mipmaps, we need to convert to linear. + if (!img.isNormalMap()) { + img.toLinear(inputOptions.inputGamma); + } + } + else { + if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) { + float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch }; + img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params); + } + else { + img.buildNextMipmap(inputOptions.mipmapFilter); + } + } + nvDebugCheck(img.width() == w); + nvDebugCheck(img.height() == h); + nvDebugCheck(img.depth() == d); + + if (img.isNormalMap()) { + if (inputOptions.normalizeMipmaps) { + img.expandNormals(); + img.normalizeNormalMap(); + img.packNormals(); + } + tmp = img; + } + else { + tmp = img; + tmp.toGamma(inputOptions.outputGamma); + } + + quantize(tmp, compressionOptions); + compress(tmp, f, m, compressionOptions, outputOptions); + } + } + } + else + { + // KTX files expect face mipmaps to be interleaved. + Array images(faceCount); + Array mipChainBroken(faceCount); + int w = width; int h = height; int d = depth; - bool canUseSourceImagesForThisFace = canUseSourceImages; - img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]); + // https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16 + uint imageSize = estimateSize(w, h, 1, 1, compressionOptions) * faceCount; + outputOptions.writeData(&imageSize, sizeof(uint32)); - // To normal map. - if (inputOptions.convertToNormalMap) { - img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w); - img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w); - img.packNormals(); - } + for (int f = 0; f < faceCount; f++) + { + nvtt::Surface s; + s.setWrapMode(inputOptions.wrapMode); + s.setAlphaMode(inputOptions.alphaMode); + s.setNormalMap(inputOptions.isNormalMap); - // To linear space. - if (!img.isNormalMap()) { - img.toLinear(inputOptions.inputGamma); - } + s.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]); - // Resize input. - img.resize(w, h, d, ResizeFilter_Box); + // To normal map. + if (inputOptions.convertToNormalMap) { + s.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w); + s.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w); + } - nvtt::Surface tmp = img; - if (!img.isNormalMap()) { - tmp.toGamma(inputOptions.outputGamma); - } + // To linear space. + if (!s.isNormalMap()) { + s.toLinear(inputOptions.inputGamma); + } - quantize(tmp, compressionOptions); - compress(tmp, f, 0, compressionOptions, outputOptions); + // Resize input. + s.resize(w, h, d, ResizeFilter_Box); + + nvtt::Surface tmp = s; + if (!s.isNormalMap()) { + tmp.toGamma(inputOptions.outputGamma); + } + + quantize(tmp, compressionOptions); + compress(tmp, f, 0, compressionOptions, outputOptions); - for (int m = 1; m < mipmapCount; m++) { + images.push_back(s); + mipChainBroken.push_back(false); + } + + static const unsigned char padding[3] = {0, 0, 0}; + for (int m = 1; m < mipmapCount; m++) + { w = max(1, w/2); h = max(1, h/2); d = max(1, d/2); - int idx = m * faceCount + f; + // https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16 + imageSize = estimateSize(w, h, d, 1, compressionOptions) * faceCount; - bool useSourceImages = false; - if (canUseSourceImagesForThisFace) { - if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level. - canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images. - } - else { - useSourceImages = true; + outputOptions.writeData(&imageSize, sizeof(uint32)); + + nvtt::Surface tmp; + + for (int f = 0; f < faceCount; f++) + { + nvtt::Surface& img = images[f]; + int idx = m * faceCount + f; + + bool useSourceImages = false; + if (!mipChainBroken[f]) { + if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level. + mipChainBroken[f] = false; // If one level is missing, ignore the following source images. + } + else { + useSourceImages = true; + } } - } - if (useSourceImages) { - img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]); + if (useSourceImages) { + img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]); - // For already generated mipmaps, we need to convert to linear. - if (!img.isNormalMap()) { - img.toLinear(inputOptions.inputGamma); + // For already generated mipmaps, we need to convert to linear. + if (!img.isNormalMap()) { + img.toLinear(inputOptions.inputGamma); + } } - } - else { - if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) { - float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch }; - img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params); + else { + if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) { + float params[2] = { inputOptions.kaiserStretch, inputOptions.kaiserAlpha }; + img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params); + } + else { + img.buildNextMipmap(inputOptions.mipmapFilter); + } + } + nvDebugCheck(img.width() == w); + nvDebugCheck(img.height() == h); + nvDebugCheck(img.depth() == d); + + if (img.isNormalMap()) { + if (inputOptions.normalizeMipmaps) { + img.normalizeNormalMap(); + } + tmp = img; } else { - img.buildNextMipmap(inputOptions.mipmapFilter); + tmp = img; + tmp.toGamma(inputOptions.outputGamma); } - } - nvDebugCheck(img.width() == w); - nvDebugCheck(img.height() == h); - nvDebugCheck(img.depth() == d); - if (img.isNormalMap()) { - if (inputOptions.normalizeMipmaps) { - img.expandNormals(); - img.normalizeNormalMap(); - img.packNormals(); + quantize(tmp, compressionOptions); + compress(tmp, f, m, compressionOptions, outputOptions); + + //cube padding + if (faceCount == 6 && arraySize == 1) + { + //TODO calc offset for uncompressed images } - tmp = img; - } - else { - tmp = img; - tmp.toGamma(inputOptions.outputGamma); } - quantize(tmp, compressionOptions); - compress(tmp, f, m, compressionOptions, outputOptions); + int mipPadding = 3 - ((imageSize + 3) % 4); + if (mipPadding != 0) { + outputOptions.writeData(&padding, mipPadding); + } } } @@ -673,6 +803,131 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int return writeSucceed; } + else if (outputOptions.container == Container_KTX) + { + KtxHeader header; + // TODO cube arrays + if (textureType == TextureType_2D) { + nvCheck(arraySize == 1); + header.numberOfArrayElements = 0; + header.numberOfFaces = 1; + header.pixelDepth = 0; + } + else if (textureType == TextureType_Cube) { + nvCheck(arraySize == 1); + header.numberOfArrayElements = 0; + header.numberOfFaces = 6; + header.pixelDepth = 0; + } + else if (textureType == TextureType_3D) { + nvCheck(arraySize == 1); + header.numberOfArrayElements = 0; + header.numberOfFaces = 1; + header.pixelDepth = d; + } + else if (textureType == TextureType_Array) { + header.numberOfArrayElements = arraySize; + header.numberOfFaces = 1; + header.pixelDepth = 0; // Is it? + } + + header.pixelWidth = w; + header.pixelHeight = h; + header.numberOfMipmapLevels = mipmapCount; + + bool supported = true; + + // TODO non-compressed formats + if (compressionOptions.format == Format_RGBA) + { + //header.glType = ?; + //header.glTypeSize = ?; + //header.glFormat = ?; + } + else + { + header.glType = 0; + header.glTypeSize = 1; + header.glFormat = 0; + + if (compressionOptions.format == Format_DXT1) { + header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB; + } + else if (compressionOptions.format == Format_DXT1a) { + header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA; + } + else if (compressionOptions.format == Format_DXT3) { + header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA; + } + else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) { + header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA; + } + else if (compressionOptions.format == Format_BC4) { + header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_RGTC1; // KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 ? + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED; + } + else if (compressionOptions.format == Format_BC5) { + header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_RGTC2; // KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 ? + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG; + } + else if (compressionOptions.format == Format_BC6) { + if (compressionOptions.pixelType == PixelType_Float) header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT; + else /*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; // By default we assume unsigned. + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB; + } + else if (compressionOptions.format == Format_BC7) { + header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM : KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA; + } + else if (compressionOptions.format == Format_ETC1) { + header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC1 : KTX_INTERNAL_COMPRESSED_RGB_ETC1; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB; + } + else if (compressionOptions.format == Format_ETC2_R) { + header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_EAC; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED; + } + else if (compressionOptions.format == Format_ETC2_RG) { + header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_EAC; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG; + } + else if (compressionOptions.format == Format_ETC2_RGB) { + header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC2 : KTX_INTERNAL_COMPRESSED_RGB_ETC2; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB; + } + else if (compressionOptions.format == Format_ETC2_RGBA) { + header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC : KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC; + header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA; + } + else { + supported = false; + } + + //TODO compressionOptions.format == Format_DXT1n, Format_DXT5n ? There seems to be no way to indicate a normal map using ktx. Maybe via key value data? + } + + if (!supported) + { + // This container does not support the requested format. + outputOptions.error(Error_UnsupportedOutputFormat); + return false; + } + + const uint headerSize = 64; + nvStaticCheck(sizeof(KtxHeader) == 64); + + bool writeSucceed = outputOptions.writeData(&header, headerSize); + if (!writeSucceed) + { + outputOptions.error(Error_FileWrite); + } + + return writeSucceed; + } return true; } @@ -788,15 +1043,34 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression { return new CompressorBC7; } - /*else if (compressionOptions.format == Format_BC5_Luma) - { - return new ProductionCompressorBC5_Luma; - }*/ else if (compressionOptions.format == Format_BC3_RGBM) { return new CompressorBC3_RGBM; } - + else if (compressionOptions.format >= Format_ETC1 && compressionOptions.format <= Format_ETC2_RGB_A1) + { +#if defined(HAVE_RGETC) + if (compressionOptions.format == Format_ETC1 && compressionOptions.externalCompressor == "rg_etc") return new RgEtcCompressor; +#endif +#if defined(HAVE_ETCLIB) + if (compressionOptions.externalCompressor == "etclib") return new EtcLibCompressor; +#endif + if (compressionOptions.format == Format_ETC1) return new CompressorETC1; + else if (compressionOptions.format == Format_ETC2_R) return new CompressorETC2_R; + //else if (compressionOptions.format == Format_ETC2_RG) return new CompressorETC2_RG; + else if (compressionOptions.format == Format_ETC2_RGB) return new CompressorETC2_RGB; + else if (compressionOptions.format == Format_ETC2_RGBA) return new CompressorETC2_RGBA; + } + else if (compressionOptions.format == Format_ETC2_RGBM) + { + return new CompressorETC2_RGBM; + } + else if (compressionOptions.format >= Format_PVR_2BPP_RGB && compressionOptions.format <= Format_PVR_4BPP_RGBA) + { +#if defined(HAVE_PVRTEXTOOL) + return new CompressorPVR; +#endif + } return NULL; } @@ -860,3 +1134,24 @@ CompressorInterface * Compressor::Private::chooseGpuCompressor(const Compression return NULL; } + +int Compressor::Private::estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const +{ + const Format format = compressionOptions.format; + + const uint bitCount = compressionOptions.bitcount; + const uint pitchAlignment = compressionOptions.pitchAlignment; + + int size = 0; + for (int m = 0; m < mipmapCount; m++) + { + size += computeImageSize(w, h, d, bitCount, pitchAlignment, format); + + // Compute extents of next mipmap: + w = max(1, w / 2); + h = max(1, h / 2); + d = max(1, d / 2); + } + + return size; +} diff --git a/src/nvtt/Context.h b/src/nvtt/Context.h index c497bb1..de801c0 100644 --- a/src/nvtt/Context.h +++ b/src/nvtt/Context.h @@ -56,6 +56,7 @@ namespace nvtt nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const; nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const; + int estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const; bool cudaSupported; bool cudaEnabled; diff --git a/src/nvtt/OutputOptions.h b/src/nvtt/OutputOptions.h index 5b9a750..df6080c 100644 --- a/src/nvtt/OutputOptions.h +++ b/src/nvtt/OutputOptions.h @@ -34,61 +34,61 @@ namespace nvtt { - struct DefaultOutputHandler : public nvtt::OutputHandler - { - DefaultOutputHandler(const char * fileName) : stream(fileName) {} + struct DefaultOutputHandler : public nvtt::OutputHandler + { + DefaultOutputHandler(const char * fileName) : stream(fileName) {} DefaultOutputHandler(FILE * fp) : stream(fp, false) {} - - virtual ~DefaultOutputHandler() {} - - virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) - { - // ignore. - } - - // Output data. - virtual bool writeData(const void * data, int size) - { - stream.serialize(const_cast(data), size); - - //return !stream.isError(); - return true; - } - - virtual void endImage() - { - // ignore. - } - - nv::StdOutputStream stream; - }; - - - struct OutputOptions::Private - { - nv::Path fileName; + + virtual ~DefaultOutputHandler() {} + + virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) + { + // ignore. + } + + // Output data. + virtual bool writeData(const void * data, int size) + { + stream.serialize(const_cast(data), size); + + //return !stream.isError(); + return true; + } + + virtual void endImage() + { + // ignore. + } + + nv::StdOutputStream stream; + }; + + + struct OutputOptions::Private + { + nv::Path fileName; FILE * fileHandle; - - OutputHandler * outputHandler; - ErrorHandler * errorHandler; - bool outputHeader; - Container container; + OutputHandler * outputHandler; + ErrorHandler * errorHandler; + + bool outputHeader; + Container container; int version; bool srgb; bool deleteOutputHandler; void * wrapperProxy; // For the C/C# wrapper. - - bool hasValidOutputHandler() const; - void beginImage(int size, int width, int height, int depth, int face, int miplevel) const; - bool writeData(const void * data, int size) const; + bool hasValidOutputHandler() const; + + void beginImage(int size, int width, int height, int depth, int face, int miplevel) const; + bool writeData(const void * data, int size) const; void endImage() const; - void error(Error e) const; - }; + void error(Error e) const; + }; + - } // nvtt namespace diff --git a/src/nvtt/QuickCompressDXT.h b/src/nvtt/QuickCompressDXT.h index dbfc824..f5d952f 100644 --- a/src/nvtt/QuickCompressDXT.h +++ b/src/nvtt/QuickCompressDXT.h @@ -39,21 +39,21 @@ namespace nv struct AlphaBlockDXT5; class Vector3; - namespace QuickCompress - { - void compressDXT1(const ColorBlock & src, BlockDXT1 * dst); - void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst); - - void compressDXT3(const ColorBlock & src, BlockDXT3 * dst); - - void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8); - void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8); - - void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8); + namespace QuickCompress + { + void compressDXT1(const ColorBlock & src, BlockDXT1 * dst); + void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst); + + void compressDXT3(const ColorBlock & src, BlockDXT3 * dst); + + void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8); + void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8); + + void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8); void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); - } + } } // nv namespace #endif // NV_TT_QUICKCOMPRESSDXT_H diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp index 869f0f1..41421ca 100644 --- a/src/nvtt/Surface.cpp +++ b/src/nvtt/Surface.cpp @@ -23,12 +23,14 @@ // OTHER DEALINGS IN THE SOFTWARE. #include "Surface.h" +#include "CompressorETC.h" // for ETC decoder. #include "nvmath/Vector.inl" #include "nvmath/Matrix.inl" #include "nvmath/Color.h" #include "nvmath/Half.h" #include "nvmath/ftoi.h" +#include "nvmath/PackedFloat.h" #include "nvimage/Filter.h" #include "nvimage/ImageIO.h" @@ -39,8 +41,13 @@ #include "nvimage/ErrorMetric.h" #include "nvimage/DirectDrawSurface.h" +#include "nvthread/ParallelFor.h" + +#include "nvcore/Array.inl" + #include #include // memset, memcpy +//#include // printf? #if NV_CC_GNUC #include // exp2f and log2f @@ -123,6 +130,18 @@ namespace else if (format == Format_BC7) { return 16; } + else if (format == Format_ETC1 || format == Format_ETC2_R || format == Format_ETC2_RGB) { + return 8; + } + else if (format == Format_ETC2_RG || format == Format_ETC2_RGBA || format == Format_ETC2_RGBM) { + return 16; + } + else if (format == Format_PVR_2BPP_RGB || format == Format_PVR_2BPP_RGBA) { + return 4; + } + else if (format == Format_PVR_4BPP_RGB || format == Format_PVR_4BPP_RGBA) { + return 8; + } return 0; } @@ -197,7 +216,7 @@ uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlign } } -void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType) { +void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType, nvtt::ShapeRestriction shapeRestriction /*= nvtt::ShapeRestriction_None*/) { nvDebugCheck(width != NULL && *width > 0); nvDebugCheck(height != NULL && *height > 0); nvDebugCheck(depth != NULL && *depth > 0); @@ -234,21 +253,21 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, // Round to power of two. if (roundMode == RoundMode_ToNextPowerOfTwo) { - w = nextPowerOfTwo(w); - h = nextPowerOfTwo(h); - d = nextPowerOfTwo(d); + w = nextPowerOfTwo(U32(w)); + h = nextPowerOfTwo(U32(h)); + d = nextPowerOfTwo(U32(d)); } else if (roundMode == RoundMode_ToNearestPowerOfTwo) { - w = nearestPowerOfTwo(w); - h = nearestPowerOfTwo(h); - d = nearestPowerOfTwo(d); + w = nearestPowerOfTwo(U32(w)); + h = nearestPowerOfTwo(U32(h)); + d = nearestPowerOfTwo(U32(d)); } else if (roundMode == RoundMode_ToPreviousPowerOfTwo) { - w = previousPowerOfTwo(w); - h = previousPowerOfTwo(h); - d = previousPowerOfTwo(d); + w = previousPowerOfTwo(U32(w)); + h = previousPowerOfTwo(U32(h)); + d = previousPowerOfTwo(U32(d)); } else if (roundMode == RoundMode_ToNextMultipleOfFour) { @@ -269,6 +288,38 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, d = previousMultipleOfFour(d); } + if(shapeRestriction == ShapeRestriction_Square) + { + if (textureType == TextureType_2D) + { + int md = nv::min(w,h); + w = md; + h = md; + d = 1; + } + else if (textureType == TextureType_3D) + { + int md = nv::min(nv::min(w,h),d); + w = md; + h = md; + d = md; + } + else if (textureType == TextureType_Cube) + { + int md = nv::min(w, h); + w = md; + h = md; + d = 1; + } + } + else + { + if (textureType == TextureType_2D || textureType == TextureType_Cube) + { + d = 1; + } + } + *width = w; *height = h; *depth = d; @@ -509,8 +560,8 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_c } } - *rangeMin = range.x; - *rangeMax = range.y; + if (rangeMin) *rangeMin = range.x; + if (rangeMax) *rangeMax = range.y; } bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/) @@ -583,7 +634,7 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/) } // @@ Have loadFloat allocate the image with the desired number of channels. - img->resizeChannelCount(4); + //img->resizeChannelCount(4); delete m->image; m->image = img.release(); @@ -601,7 +652,8 @@ bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) c return ImageIO::saveFloat(fileName, m->image, 0, 4); } else { - AutoPtr image(m->image->createImage(0, 4)); + uint c = min(m->image->componentCount(), 4); + AutoPtr image(m->image->createImage(0, c)); nvCheck(image != NULL); if (hasAlpha) { @@ -829,16 +881,35 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r, return true; } +#if defined(HAVE_PVRTEXTOOL) +#include +#endif + // @@ Add support for compressed 3D textures. bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data) { if (format != nvtt::Format_BC1 && format != nvtt::Format_BC2 && format != nvtt::Format_BC3 && + format != nvtt::Format_BC3n && + format != nvtt::Format_BC3_RGBM && format != nvtt::Format_BC4 && format != nvtt::Format_BC5 && format != nvtt::Format_BC6 && - format != nvtt::Format_BC7) + format != nvtt::Format_BC7 && + format != nvtt::Format_ETC1 && + format != nvtt::Format_ETC2_R && + format != nvtt::Format_ETC2_RG && + format != nvtt::Format_ETC2_RGB && + format != nvtt::Format_ETC2_RGBA && + format != nvtt::Format_ETC2_RGBM + #if defined(HAVE_PVRTEXTOOL) + && format != nvtt::Format_PVR_2BPP_RGB + && format != nvtt::Format_PVR_4BPP_RGB + && format != nvtt::Format_PVR_2BPP_RGBA + && format != nvtt::Format_PVR_4BPP_RGBA + #endif + ) { return false; } @@ -851,7 +922,7 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi m->image->allocate(4, w, h, 1); m->type = TextureType_2D; - const int bw = (w + 3) / 4; + const int bw = (w + 3) / 4; // @@ Not if PVR 2bpp! const int bh = (h + 3) / 4; const uint bs = blockSize(format); @@ -859,130 +930,166 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi const uint8 * ptr = (const uint8 *)data; TRY { - if (format == nvtt::Format_BC6) - { - // BC6 format - decode directly to float - - for (int y = 0; y < bh; y++) - { - for (int x = 0; x < bw; x++) - { - Vector3 colors[16]; - const BlockBC6 * block = (const BlockBC6 *)ptr; - block->decodeBlock(colors); - - for (int yy = 0; yy < 4; yy++) - { - for (int xx = 0; xx < 4; xx++) - { - Vector3 rgb = colors[yy*4 + xx]; - - if (x * 4 + xx < w && y * 4 + yy < h) - { - m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = rgb.x; - m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = rgb.y; - m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = rgb.z; - m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = 1.0f; - } - } - } - - ptr += bs; - } - } - } - else - { - // Non-BC6 - decode to 8-bit, then convert to float - - for (int y = 0; y < bh; y++) - { - for (int x = 0; x < bw; x++) - { - ColorBlock colors; - - if (format == nvtt::Format_BC1) - { - const BlockDXT1 * block = (const BlockDXT1 *)ptr; - - if (decoder == Decoder_D3D10) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_D3D9) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - else if (format == nvtt::Format_BC2) - { - const BlockDXT3 * block = (const BlockDXT3 *)ptr; - - if (decoder == Decoder_D3D10) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_D3D9) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - else if (format == nvtt::Format_BC3) - { - const BlockDXT5 * block = (const BlockDXT5 *)ptr; - - if (decoder == Decoder_D3D10) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_D3D9) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - else if (format == nvtt::Format_BC4) - { - const BlockATI1 * block = (const BlockATI1 *)ptr; - block->decodeBlock(&colors, decoder == Decoder_D3D9); - } - else if (format == nvtt::Format_BC5) - { - const BlockATI2 * block = (const BlockATI2 *)ptr; - block->decodeBlock(&colors, decoder == Decoder_D3D9); - } - else if (format == nvtt::Format_BC7) - { - const BlockBC7 * block = (const BlockBC7 *)ptr; - block->decodeBlock(&colors); - } - else - { - nvDebugCheck(false); - } - - for (int yy = 0; yy < 4; yy++) - { - for (int xx = 0; xx < 4; xx++) - { - Color32 c = colors.color(xx, yy); - - if (x * 4 + xx < w && y * 4 + yy < h) - { - m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f; - m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f; - m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f; - m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f; - } - } - } - - ptr += bs; - } - } - } +#if defined(HAVE_PVRTEXTOOL) + if (format >= nvtt::Format_PVR_2BPP_RGB && format <= nvtt::Format_PVR_4BPP_RGBA) + { + bool two_bit_mode = (format == nvtt::Format_PVR_2BPP_RGB || format == nvtt::Format_PVR_2BPP_RGBA); + + uint8 * output = new uint8[4 * w * h]; + + PVRTDecompressPVRTC(ptr, two_bit_mode, w, h, output); + + for (int y = 0; y < h; y++) { + for (int x = 0; x < w; x++) { + m->image->pixel(0, x, y, 0) = output[4*(y*w + x) + 0] / 255.0f; + m->image->pixel(1, x, y, 0) = output[4*(y*w + x) + 1] / 255.0f; + m->image->pixel(2, x, y, 0) = output[4*(y*w + x) + 2] / 255.0f; + m->image->pixel(3, x, y, 0) = output[4*(y*w + x) + 3] / 255.0f; + } + } + + delete [] output; + } + else +#endif + if (format == nvtt::Format_BC6 || (format >= nvtt::Format_ETC1 && format <= nvtt::Format_ETC2_RGBM)) + { + // Some formats we decode directly to float: + + for (int y = 0; y < bh; y++) { + for (int x = 0; x < bw; x++) { + Vector4 colors[16]; + + if (format == nvtt::Format_BC6) { + const BlockBC6 * block = (const BlockBC6 *)ptr; + block->decodeBlock(colors); + } + else if (format == nvtt::Format_ETC1 || format == nvtt::Format_ETC2_RGB) { + nv::decompress_etc(ptr, colors); + } + else if (format == nvtt::Format_ETC2_RGBA || format == nvtt::Format_ETC2_RGBM) { + nv::decompress_etc_eac(ptr, colors); + } + else if (format == nvtt::Format_ETC2_R) { + // @@ Not implemented. + //nv::decompress_eac(ptr, colors); + } + else if (format == nvtt::Format_ETC2_RG) { + // @@ Not implemented. + //nv::decompress_eac(ptr, colors); + } + else if (format == nvtt::Format_ETC2_RGB_A1) { + // @@ Not implemented? + //nv::decompress_etc(ptr, colors); + } + + for (int yy = 0; yy < 4; yy++) { + for (int xx = 0; xx < 4; xx++) { + Vector4 c = colors[yy*4 + xx]; + + if (x * 4 + xx < w && y * 4 + yy < h) { + m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = c.x; + m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = c.y; + m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = c.z; + m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = c.w; + } + } + } + + ptr += bs; + } + } + } + else + { + // Others, we decode to 8-bit, then convert to float + + for (int y = 0; y < bh; y++) { + for (int x = 0; x < bw; x++) { + ColorBlock colors; + + if (format == nvtt::Format_BC1) + { + const BlockDXT1 * block = (const BlockDXT1 *)ptr; + + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC2) + { + const BlockDXT3 * block = (const BlockDXT3 *)ptr; + + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC3 || format == nvtt::Format_BC3n || format == nvtt::Format_BC3_RGBM) + { + const BlockDXT5 * block = (const BlockDXT5 *)ptr; + + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC4) + { + const BlockATI1 * block = (const BlockATI1 *)ptr; + block->decodeBlock(&colors, decoder == Decoder_D3D9); + } + else if (format == nvtt::Format_BC5) + { + const BlockATI2 * block = (const BlockATI2 *)ptr; + block->decodeBlock(&colors, decoder == Decoder_D3D9); + } + else if (format == nvtt::Format_BC7) + { + const BlockBC7 * block = (const BlockBC7 *)ptr; + block->decodeBlock(&colors); + } + else + { + nvDebugCheck(false); + } + + for (int yy = 0; yy < 4; yy++) + { + for (int xx = 0; xx < 4; xx++) + { + Color32 c = colors.color(xx, yy); + + if (x * 4 + xx < w && y * 4 + yy < h) + { + m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f; + m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f; + m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f; + m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f; + } + } + } + + ptr += bs; + } + } + } } CATCH { return false; @@ -1092,7 +1199,7 @@ void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth m->image = img; } -void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter) +void Surface::resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter) { if (isNull()) return; @@ -1104,27 +1211,17 @@ void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilte int h = m->image->height(); int d = m->image->depth(); - getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type); + getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type, nvtt::ShapeRestriction_Square); if (m->type == TextureType_2D) { nvDebugCheck(d==1); - int md = nv::min(w,h); - w = md; - h = md; } else if (m->type == TextureType_Cube) { nvDebugCheck(d==1); nvDebugCheck(w==h); } - else if (m->type == TextureType_3D) - { - int md = nv::min(nv::min(w,h),d); - w = md; - h = md; - d = md; - } resize(w, h, d, filter, filterWidth, params); } @@ -1151,6 +1248,63 @@ void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, fl resize(w, h, d, filter, filterWidth, params); } + +float rmsBilinearError(nvtt::Surface original, nvtt::Surface resized) { + return nv::rmsBilinearColorError(original.m->image, resized.m->image, (FloatImage::WrapMode)original.wrapMode(), original.alphaMode() == AlphaMode_Transparency); +} + + +void Surface::autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter) +{ + Surface original = *this; + Surface resized = original; + + int w = width(); + int h = height(); + int d = depth(); + + w = (w + 1) / 2; + h = (h + 1) / 2; + d = (d + 1) / 2; + + while (w >= 4 && h >= 4 && d >= 1) { + + // Resize always from original? This is more expensive, but should produce higher quality. + //resized = original; + + resized.resize(w, h, d, filter); + +#if 0 + // Scale back up to original size. @@ Upscaling not implemented! + Surface restored = resized; + restored.resize(original.width(), original.height(), original.depth(), ResizeFilter_Triangle); + + float error; + if (isNormalMap()) { + error = nvtt::angularError(original, restored); + } + else { + error = nvtt::rmsError(original, restored); + } +#else + float error = rmsBilinearError(original, resized); +#endif + + if (error < errorTolerance) { + *this = resized; + nvDebug("image resized %dx%d -> %dx%d (error=%f)\n", original.width(), original.height(), w, h, error); + } + else { + nvDebug("image can't be resized further (error=%f)\n", error); + break; + } + + w = (w + 1) / 2; + h = (h + 1) / 2; + d = (d + 1) / 2; + } +} + bool Surface::canMakeNextMipmap(int min_size /*= 1*/) { if (isNull()) return false; @@ -1196,7 +1350,7 @@ bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const floa { nvDebugCheck(filter == MipmapFilter_Kaiser); KaiserFilter filter(filterWidth); - if (params != NULL) filter.setParameters(params[0], params[1]); + if (params != NULL) filter.setParameters(/*alpha=*/params[0], /*stretch=*/params[1]); img = img->downSample(filter, wrapMode, 3); } } @@ -1357,8 +1511,9 @@ void Surface::toSrgb() for (uint c = 0; c < 3; c++) { float * channel = img->channel(c); for (uint i = 0; i < count; i++) { + //parallel_for(count, 128, [=](int i) { channel[i] = ::toSrgb(channel[i]); - } + }//); } } @@ -1382,8 +1537,9 @@ void Surface::toLinearFromSrgb() for (uint c = 0; c < 3; c++) { float * channel = img->channel(c); for (uint i = 0; i < count; i++) { + //parallel_for(count, 128, [=](int i) { channel[i] = ::fromSrgb(channel[i]); - } + }//); } } @@ -2827,6 +2983,78 @@ Surface Surface::createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) return s; } + +Surface Surface::warp(int w, int h, WarpFunction * warp_function) const +{ + Surface s; + + FloatImage * img = s.m->image = new FloatImage; + + const int C = m->image->componentCount(); + img->allocate(C, w, h, 1); + +#define USE_PARALLEL_FOR 0 +#if USE_PARALLEL_FOR + nv::parallel_for(h, 1, [=](int y) { +#else + for (int y = 0; y < h; y++) { +#endif + for (int x = 0; x < w; x++) { + float fx = (float(x) + 0.0f) / w; + float fy = (float(y) + 0.0f) / h; + float fz = 0; + + warp_function(fx, fy, fz); + + for (int c = 0; c < C; c++) { + img->pixel(c, x, y, 0) = m->image->sampleLinearClamp(c, fx, fy); + } + } + } +#if USE_PARALLEL_FOR + ); +#endif + + return s; +} + +Surface Surface::warp(int w, int h, int d, WarpFunction * warp_function) const +{ + Surface s; + + FloatImage * img = s.m->image = new FloatImage; + + const int C = m->image->componentCount(); + img->allocate(C, w, h, d); + + for (int z = 0; z < d; z++) { +#define USE_PARALLEL_FOR 0 +#if USE_PARALLEL_FOR + nv::parallel_for(h, 1, [=](int y) { +#else + for (int y = 0; y < h; y++) { +#endif + for (int x = 0; x < w; x++) { + float fx = (float(x) + 0.0f) / w; + float fy = (float(y) + 0.0f) / h; + float fz = (float(z) + 0.0f) / d; + + warp_function(fx, fy, fz); + + for (int c = 0; c < C; c++) { + img->pixel(c, x, y, z) = m->image->sampleLinearClamp(c, fx, fy, fz); // @@ 2D only. + } + } + } +#if USE_PARALLEL_FOR + ); +#endif + } + + return s; +} + + bool Surface::copyChannel(const Surface & srcImage, int srcChannel) { return copyChannel(srcImage, srcChannel, srcChannel); @@ -2953,7 +3181,7 @@ void Surface::setAtlasBorder(int aw, int ah, float r, float g, float b, float a) } // Vertical lines: - for (uint i = 0, x = 0; i < uint(ah); i++, x += tile_width) + for (uint i = 0, x = 0; i < uint(aw); i++, x += tile_width) { for (uint y = 0; y < h; y++) { @@ -3083,9 +3311,9 @@ Surface nvtt::histogram(const Surface & img, int width, int height) return histogram(img, /*minRange*/0, maxRange, width, height); } -#include "nvcore/Array.inl" -#include "nvmath/PackedFloat.h" -#include +//#include "nvcore/Array.inl" +//#include "nvmath/PackedFloat.h" +//#include nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height) { @@ -3234,7 +3462,7 @@ nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRang maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z)); } - printf("maxh = %f\n", maxh); + //printf("maxh = %f\n", maxh); //maxh = 80; maxh = 256; diff --git a/src/nvtt/Surface.h b/src/nvtt/Surface.h index 419a0e1..73f7574 100644 --- a/src/nvtt/Surface.h +++ b/src/nvtt/Surface.h @@ -83,7 +83,7 @@ namespace nv { uint countMipmaps(uint w, uint h, uint d); uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size); uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format); - void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType); + void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType, nvtt::ShapeRestriction shapeRestriction = nvtt::ShapeRestriction_None); } diff --git a/src/nvtt/TaskDispatcher.h b/src/nvtt/TaskDispatcher.h index c7224d0..168ba56 100644 --- a/src/nvtt/TaskDispatcher.h +++ b/src/nvtt/TaskDispatcher.h @@ -10,8 +10,8 @@ // Gran Central Dispatch (GCD/libdispatch) // http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html #if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) -#define HAVE_GCD 1 -#include +//#define HAVE_GCD 1 +//#include #endif // Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime: @@ -64,7 +64,7 @@ namespace nvtt { #endif -#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) +#if HAVE_GCD // Task dispatcher using Apple's Grand Central Dispatch. struct AppleTaskDispatcher : public TaskDispatcher diff --git a/src/nvtt/nvtt.cpp b/src/nvtt/nvtt.cpp index b85d52e..bfdb0d1 100644 --- a/src/nvtt/nvtt.cpp +++ b/src/nvtt/nvtt.cpp @@ -47,9 +47,9 @@ const char * nvtt::errorString(Error e) return "Error writing through output handler"; case Error_UnsupportedOutputFormat: return "The container file does not support the selected output format"; + default: + return "Invalid error"; } - - return "Invalid error"; } // Return NVTT version. diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index b695490..d86a503 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -105,7 +105,21 @@ namespace nvtt Format_BC6, Format_BC7, - Format_BC3_RGBM, // + Format_BC3_RGBM, + + Format_ETC1, + Format_ETC2_R, + Format_ETC2_RG, + Format_ETC2_RGB, + Format_ETC2_RGBA, + Format_ETC2_RGB_A1, + + Format_ETC2_RGBM, + + Format_PVR_2BPP_RGB, // Using PVR textools. + Format_PVR_4BPP_RGB, + Format_PVR_2BPP_RGBA, + Format_PVR_4BPP_RGBA, Format_Count }; @@ -155,6 +169,7 @@ namespace nvtt NVTT_API void setFormat(Format format); NVTT_API void setQuality(Quality quality); NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f); + NVTT_API void setRGBMThreshold(float min_m); NVTT_API void setExternalCompressor(const char * name); @@ -173,9 +188,10 @@ namespace nvtt NVTT_API void setTargetDecoder(Decoder decoder); // Translate to and from D3D formats. + NVTT_API Format format() const; NVTT_API unsigned int d3d9Format() const; + NVTT_API unsigned int dxgiFormat() const; //NVTT_API bool setD3D9Format(unsigned int format); - //NVTT_API unsigned int dxgiFormat() const; //NVTT_API bool setDxgiFormat(unsigned int format); }; @@ -253,6 +269,14 @@ namespace nvtt AlphaMode_Premultiplied, }; + // Extents shape restrictions + enum ShapeRestriction + { + ShapeRestriction_None, + ShapeRestriction_Square, + }; + + // Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1) struct InputOptions { @@ -344,7 +368,7 @@ namespace nvtt { Container_DDS, Container_DDS10, - // Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/ + Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/ // Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format }; @@ -439,6 +463,9 @@ namespace nvtt ToneMapper_Lightmap, }; + // Transform the given x,y coordinates. + typedef void WarpFunction(float & x, float & y, float & d); + // A surface is one level of a 2D or 3D texture. (New in NVTT 2.1) // @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression. @@ -486,7 +513,8 @@ namespace nvtt NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); - NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter); + NVTT_API void resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter); + NVTT_API void autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter); NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1); NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1); @@ -554,6 +582,10 @@ namespace nvtt NVTT_API void flipZ(); NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const; + NVTT_API Surface warp(int w, int h, WarpFunction * f) const; + NVTT_API Surface warp(int w, int h, int d, WarpFunction * f) const; + + // Copy image data. NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel); NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel); diff --git a/src/nvtt/tests/testsuite.cpp b/src/nvtt/tests/testsuite.cpp index aceac51..652dcfc 100644 --- a/src/nvtt/tests/testsuite.cpp +++ b/src/nvtt/tests/testsuite.cpp @@ -146,9 +146,16 @@ static const char * s_witnessImageSet[] = { }; static const char * s_witnessLmapImageSet[] = { - "specruin.dds", - "cottage.dds", + "hallway.dds", + "windmill.dds", + "tunnel.dds", + "theater.dds", "tower.dds", + "hub.dds", + "mine.dds", + "archway.dds", + "hut.dds", + "shaft.dds", }; static const char * s_normalMapImageSet[] = { @@ -187,8 +194,14 @@ enum Mode { Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic, //Mode_BC5_Normal_DualParaboloid, - Mode_BC6, - Mode_BC7, + Mode_BC6, + Mode_BC7, + Mode_ETC1_IC, + Mode_ETC1_EtcLib, + Mode_ETC2_EtcLib, + Mode_ETC1_RgEtc, + Mode_ETC2_RGBM, + Mode_PVR, Mode_Count }; static const char * s_modeNames[] = { @@ -207,8 +220,14 @@ static const char * s_modeNames[] = { "BC5-Normal-Paraboloid", // Mode_BC5_Normal_Paraboloid, "BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic, //"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid, - "BC6", // Mode_BC6, - "BC7", // Mode_BC7, + "BC6", // Mode_BC6, + "BC7", // Mode_BC7, + "ETC1-IC", + "ETC1-EtcLib", + "ETC2-EtcLib", + "ETC1-RgEtc", + "ETC2-RGBM", + "PVR", }; nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count); @@ -218,14 +237,16 @@ struct Test { Mode modes[6]; }; static Test s_imageTests[] = { - {"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}}, - {"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}}, - //{"Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}}, - {"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}}, - {"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}}, - {"HDR", 2, {Mode_BC3_RGBM, Mode_BC6}}, - {"BC6", 1, {Mode_BC6}}, - {"BC7", 1, {Mode_BC7}}, +/*0*/ {"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}}, +/*1*/ {"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}}, +/*2*/ {"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}}, +/*3*/ {"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}}, +/*4*/ {"HDR", 3, {Mode_ETC2_RGBM, Mode_BC3_RGBM, Mode_BC6}}, +/*5*/ {"BC6", 1, {Mode_BC6}}, +/*6*/ {"BC7", 1, {Mode_BC7}}, +/*7*/ {"ETC", 3, {Mode_ETC1_IC, Mode_ETC1_RgEtc, Mode_ETC2_EtcLib}}, +/*8*/ {"Color Mobile", 4, {Mode_PVR, Mode_ETC1_IC, Mode_ETC2_EtcLib, Mode_BC1}}, +/*9*/ //{"ETC-Lightmap", 2, {Mode_BC3_RGBM, Mode_ETC_RGBM}}, }; const int s_imageTestCount = ARRAY_SIZE(s_imageTests); @@ -404,10 +425,10 @@ int main(int argc, char *argv[]) i++; } } - else - { - printf("Warning: unrecognized option \"%s\"\n", argv[i]); - } + else + { + printf("Warning: unrecognized option \"%s\"\n", argv[i]); + } } // Validate inputs. @@ -462,7 +483,8 @@ int main(int argc, char *argv[]) } else { - compressionOptions.setQuality(nvtt::Quality_Production); + compressionOptions.setQuality(nvtt::Quality_Normal); + //compressionOptions.setQuality(nvtt::Quality_Production); } //compressionOptions.setExternalCompressor("ati"); //compressionOptions.setExternalCompressor("squish"); @@ -515,13 +537,13 @@ int main(int argc, char *argv[]) // Labels on the left side. if (errorMode == ErrorMode_RMSE) { - graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01"; + graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.03,0.01"; } else if (errorMode == ErrorMode_CieLab) { graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,4,22,1"; } else if (errorMode == ErrorMode_AngularRMSE) { - graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01"; + graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.2,0.02"; // 0.05,0.01 } // Labels at the bottom. @@ -581,7 +603,6 @@ int main(int argc, char *argv[]) else if (errorMode == ErrorMode_AngularRMSE) { graphWriter << "&chtt=" << set.name << "%20-%20" << test.name << "%20-%20Angular RMSE"; } - Timer timer; @@ -590,7 +611,7 @@ int main(int argc, char *argv[]) nvtt::Surface img; - printf("Running Test: %s with Set: %s\n", test.name, set.name); + printf("Running test '%s' with set '%s'\n", test.name, set.name); graphWriter << "&chd=t:"; @@ -602,10 +623,11 @@ int main(int argc, char *argv[]) Mode mode = test.modes[t]; nvtt::Format format; + const char * compressor_name = NULL; if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal || mode == Mode_BC3_RGBS) { format = nvtt::Format_BC1; } - else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM || mode == Mode_BC3_LUVW) { + else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_LUVW) { format = nvtt::Format_BC3; } else if (mode == Mode_BC3_Normal) { @@ -614,20 +636,51 @@ int main(int argc, char *argv[]) else if (mode == Mode_BC5_Normal || mode == Mode_BC5_Normal_Stereographic || mode == Mode_BC5_Normal_Paraboloid || mode == Mode_BC5_Normal_Quartic) { format = nvtt::Format_BC5; } - else if (mode == Mode_BC6) - { - format = nvtt::Format_BC6; - } - else if (mode == Mode_BC7) - { - format = nvtt::Format_BC7; - } - else - { - nvDebugCheck(false); - } + else if (mode == Mode_BC3_RGBM) { + format = nvtt::Format_BC3_RGBM; + } + else if (mode == Mode_BC6) + { + format = nvtt::Format_BC6; + } + else if (mode == Mode_BC7) + { + format = nvtt::Format_BC7; + } + else if (mode == Mode_ETC1_IC) + { + format = nvtt::Format_ETC1; + } + else if (mode == Mode_ETC1_EtcLib) + { + format = nvtt::Format_ETC1; + compressor_name = "etclib"; + } + else if (mode == Mode_ETC2_EtcLib) + { + format = nvtt::Format_ETC2_RGB; + compressor_name = "etclib"; + } + else if (mode == Mode_ETC1_RgEtc) + { + format = nvtt::Format_ETC1; + compressor_name = "rg_etc"; + } + else if (mode == Mode_ETC2_RGBM) + { + format = nvtt::Format_ETC2_RGBM; + } + else if (mode == Mode_PVR) + { + format = nvtt::Format_PVR_4BPP_RGB; + } + else + { + nvUnreachable(); + } compressionOptions.setFormat(format); + if (compressor_name) compressionOptions.setExternalCompressor(compressor_name); if (set.type == ImageType_RGBA) { img.setAlphaMode(nvtt::AlphaMode_Transparency); @@ -653,6 +706,7 @@ int main(int argc, char *argv[]) printf("Input image '%s' not found.\n", set.fileNames[i]); return EXIT_FAILURE; } + float color_range = 0.0f; if (img.isNormalMap()) { img.normalizeNormalMap(); @@ -693,16 +747,34 @@ int main(int argc, char *argv[]) tmp.clamp(2); tmp.clamp(3); } - else if (mode == Mode_BC3_RGBM) { - tmp.setAlphaMode(nvtt::AlphaMode_None); - if (set.type == ImageType_HDR) { - // Transform to gamma-2.0 space before applying RGBM - helps a lot with banding in the darks. - tmp.toGamma(2.0f); - tmp.toRGBM(3.0f); // range of 3.0 in gamma-2.0 space == range of 9.0 in linear space + else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) { + float r, g, b; + tmp.range(0, NULL, &r); + tmp.range(1, NULL, &g); + tmp.range(2, NULL, &b); + color_range = max3(r, g, b); + printf("color range = %f\n", color_range); + + tmp.setAlphaMode(nvtt::AlphaMode_Transparency); + + const float max_color_range = 16.0f; + + if (color_range > max_color_range) { + color_range = max_color_range; } - else { - tmp.toRGBM(); + + for (int i = 0; i < 3; i++) { + tmp.scaleBias(i, 1.0f / color_range, 0.0f); } + tmp.toneMap(nvtt::ToneMapper_Linear, /*parameters=*/NULL); // Clamp without changing the hue. + + // Clamp alpha. + tmp.clamp(3); + + // To gamma. + tmp.toGamma(2); + + compressionOptions.setRGBMThreshold(0.2f); } else if (mode == Mode_BC3_LUVW) { tmp.setAlphaMode(nvtt::AlphaMode_None); @@ -781,14 +853,25 @@ int main(int argc, char *argv[]) }*/ } } - else if (mode == Mode_BC3_RGBM) { - if (set.type == ImageType_HDR) { - img_out.fromRGBM(3.0f); - img_out.toLinear(2.0f); + else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) { + /*if (set.type == ImageType_HDR) { + //img_out.fromRGBM(3.0f); + img_out.fromRGBM(range); + img_out.toLinear(2.0f); } else { img_out.fromRGBM(); + }*/ + + img_out.fromRGBM(1.0f, 0.2f); + img_out.toLinear(2); + + for (int i = 0; i < 3; i++) { + img_out.scaleBias(i, color_range, 0.0f); } + + img_out.copyChannel(img, 3); // Copy alpha channel from source. + img_out.setAlphaMode(nvtt::AlphaMode_Transparency); } else if (mode == Mode_BC3_LUVW) { if (set.type == ImageType_HDR) { diff --git a/src/nvtt/tools/cmdline.h b/src/nvtt/tools/cmdline.h index 7617ae7..f46930d 100644 --- a/src/nvtt/tools/cmdline.h +++ b/src/nvtt/tools/cmdline.h @@ -61,6 +61,9 @@ struct MyAssertHandler : public nv::AssertHandler { virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) { fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line); nv::debug::dumpInfo(); + if (nv::debug::isDebuggerPresent()) { + return NV_ABORT_DEBUG; + } exit(1); } }; diff --git a/src/nvtt/tools/compress.cpp b/src/nvtt/tools/compress.cpp index 412ba5a..36142f1 100644 --- a/src/nvtt/tools/compress.cpp +++ b/src/nvtt/tools/compress.cpp @@ -154,11 +154,13 @@ int main(int argc, char *argv[]) bool loadAsFloat = false; bool rgbm = false; bool rangescale = false; + bool srgb = false; const char * externalCompressor = NULL; bool silent = false; bool dds10 = false; + bool ktx = false; nv::Path input; nv::Path output; @@ -285,6 +287,31 @@ int main(int argc, char *argv[]) format = nvtt::Format_BC3_RGBM; rgbm = true; } + else if (strcmp("-etc1", argv[i]) == 0) + { + format = nvtt::Format_ETC1; + } + else if (strcmp("-etc2", argv[i]) == 0 || strcmp("-etc2_rgb", argv[i]) == 0) + { + format = nvtt::Format_ETC2_RGB; + } + else if (strcmp("-etc2_eac", argv[i]) == 0 || strcmp("-etc2_rgba", argv[i]) == 0) + { + format = nvtt::Format_ETC2_RGBA; + } + else if (strcmp("-eac", argv[i]) == 0 || strcmp("-etc2_r", argv[i]) == 0) + { + format = nvtt::Format_ETC2_R; + } + else if (strcmp("-etc2_rg", argv[i]) == 0) + { + format = nvtt::Format_ETC2_R; + } + else if (strcmp("-etc2_rgbm", argv[i]) == 0) + { + format = nvtt::Format_ETC2_RGBM; + rgbm = true; + } // Undocumented option. Mainly used for testing. else if (strcmp("-ext", argv[i]) == 0) @@ -309,7 +336,15 @@ int main(int argc, char *argv[]) { dds10 = true; } - + else if (strcmp("-ktx", argv[i]) == 0) + { + ktx = true; + } + else if (strcmp("-srgb", argv[i]) == 0) + { + srgb = true; + } + else if (argv[i][0] != '-') { input = argv[i]; @@ -321,15 +356,23 @@ int main(int argc, char *argv[]) { output.copy(input.str()); output.stripExtension(); - output.append(".dds"); + + if (ktx) + { + output.append(".ktx"); + } + else + { + output.append(".dds"); + } } break; } - else - { - printf("Warning: unrecognized option \"%s\"\n", argv[i]); - } + else + { + printf("Warning: unrecognized option \"%s\"\n", argv[i]); + } } const uint version = nvtt::version(); @@ -380,7 +423,9 @@ int main(int argc, char *argv[]) printf("Output options:\n"); printf(" -silent \tDo not output progress messages\n"); - printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7)\n\n"); + printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7, unless ktx is being used)\n"); + printf(" -ktx \tUse KTX container format\n"); + printf(" -srgb \tIf the requested format allows it, output will be in sRGB color space\n\n"); return EXIT_FAILURE; } @@ -398,7 +443,7 @@ int main(int argc, char *argv[]) bool useSurface = false; // @@ use Surface API in all cases! nvtt::Surface image; - if (format == nvtt::Format_BC3_RGBM || rgbm) { + if (format == nvtt::Format_BC3_RGBM || format == nvtt::Format_ETC2_RGBM || rgbm) { useSurface = true; if (!image.load(input.str())) { @@ -440,7 +485,7 @@ int main(int argc, char *argv[]) // To gamma. image.toGamma(2); - if (format != nvtt::Format_BC3_RGBM) { + if (format != nvtt::Format_BC3_RGBM || format != nvtt::Format_ETC2_RGBM) { image.setAlphaMode(nvtt::AlphaMode_None); image.toRGBM(1, 0.15f); } @@ -494,7 +539,7 @@ int main(int argc, char *argv[]) nvDebugCheck(dds.isTextureArray()); inputOptions.setTextureLayout(nvtt::TextureType_Array, dds.width(), dds.height(), 1, dds.arraySize()); faceCount = dds.arraySize(); - dds10 = true; + dds10 = ktx ? false : true; } uint mipmapCount = dds.mipmapCount(); @@ -569,11 +614,12 @@ int main(int argc, char *argv[]) inputOptions.setAlphaMode(nvtt::AlphaMode_None); } + // IC: Do not enforce D3D9 restrictions anymore. // Block compressed textures with mipmaps must be powers of two. - if (!noMipmaps && format != nvtt::Format_RGB) + /*if (!noMipmaps && format != nvtt::Format_RGB) { inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo); - } + }*/ if (normal) { @@ -720,15 +766,27 @@ int main(int argc, char *argv[]) outputOptions.setOutputHandler(&outputHandler); outputOptions.setErrorHandler(&errorHandler); - // Automatically use dds10 if compressing to BC6 or BC7 - if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7) - { - dds10 = true; - } - - if (dds10) + if (ktx) { - outputOptions.setContainer(nvtt::Container_DDS10); + outputOptions.setContainer(nvtt::Container_KTX); + } + else + { + // Automatically use dds10 if compressing to BC6 or BC7 + if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7) { + dds10 = true; + } + + if (dds10) { + outputOptions.setContainer(nvtt::Container_DDS10); + } + else { + outputOptions.setContainer(nvtt::Container_DDS); + } + } + + if (srgb) { + outputOptions.setSrgbFlag(true); } // printf("Press ENTER.\n"); diff --git a/src/nvtt/tools/thumbnailer.cpp b/src/nvtt/tools/thumbnailer.cpp index 1c8ab01..b9fb4cf 100644 --- a/src/nvtt/tools/thumbnailer.cpp +++ b/src/nvtt/tools/thumbnailer.cpp @@ -99,8 +99,8 @@ int main(int argc, char *argv[]) return 1; } - break; - } + break; + } } if (input.isNull() || output.isNull()) @@ -136,21 +136,21 @@ int main(int argc, char *argv[]) nv::FloatImage fimage(&image); fimage.toLinear(0, 3, gamma); - uint thumbW, thumbH; - if (image.width() > image.height()) - { - thumbW = size; - thumbH = uint ((float (image.height()) / float (image.width())) * size); - } - else - { - thumbW = uint ((float (image.width()) / float (image.height())) * size); - thumbH = size; - } - nv::AutoPtr fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp)); - - nv::AutoPtr result(fresult->createImageGammaCorrect(gamma)); - result->setFormat(nv::Image::Format_ARGB); + uint thumbW, thumbH; + if (image.width() > image.height()) + { + thumbW = size; + thumbH = uint ((float (image.height()) / float (image.width())) * size); + } + else + { + thumbW = uint ((float (image.width()) / float (image.height())) * size); + thumbH = size; + } + nv::AutoPtr fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp)); + + nv::AutoPtr result(fresult->createImageGammaCorrect(gamma)); + result->setFormat(nv::Image::Format_ARGB); nv::StdOutputStream stream(output.str()); nv::ImageIO::save(output.str(), stream, result.ptr(), metaData.buffer()); @@ -160,7 +160,7 @@ int main(int argc, char *argv[]) nv::StdOutputStream stream(output.str()); nv::ImageIO::save(output.str(), stream, &image, metaData.buffer()); } - + return 0; }