Merge changes from The Witness.

This commit is contained in:
Ignacio 2018-02-05 18:55:07 -08:00
parent 2075d740c9
commit 9489aed825
88 changed files with 8924 additions and 5025 deletions

View File

@ -31,7 +31,7 @@ MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}")
MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}") MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}")
IF(CMAKE_BUILD_TYPE MATCHES "debug") IF(CMAKE_BUILD_TYPE MATCHES "debug")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.") SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.")
ADD_DEFINITIONS(-D_DEBUG=1) ADD_DEFINITIONS(-D_DEBUG=1)
ENDIF() ENDIF()

View File

@ -1,6 +1,6 @@
NVIDIA Texture Tools is licensed under the MIT license. NVIDIA Texture Tools is licensed under the MIT license.
Copyright (c) 2009-2016 Ignacio Castano Copyright (c) 2009-2017 Ignacio Castaño
Copyright (c) 2007-2009 NVIDIA Corporation Copyright (c) 2007-2009 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person Permission is hereby granted, free of charge, to any person

View File

@ -6,7 +6,7 @@ manipulation tools, designed to be integrated in game tools and asset
processing pipelines. processing pipelines.
The primary features of the library are mipmap and normal map generation, format The primary features of the library are mipmap and normal map generation, format
conversion and DXT compression. conversion, and DXT compression.
### How to build (Windows) ### How to build (Windows)
@ -42,5 +42,5 @@ src/nvtt/tools/compress.cpp
Detailed documentation of the API can be found at: Detailed documentation of the API can be found at:
http://code.google.com/p/nvidia-texture-tools/wiki/ApiDocumentation https://github.com/castano/nvidia-texture-tools/wiki/ApiDocumentation

View File

@ -36,4 +36,6 @@ do
#./nvcompress -silent -alpha -nomips -bc6 $file.$EXT $file.bc6.dds #./nvcompress -silent -alpha -nomips -bc6 $file.$EXT $file.bc6.dds
#./nvimgdiff -alpha $file.$EXT $file.bc6.dds #./nvimgdiff -alpha $file.$EXT $file.bc6.dds
# ETC2-EAC
./nvcompress -silent -alpha -nomips -etc_rgbm
done done

15
extern/poshlib/posh.h vendored
View File

@ -349,9 +349,18 @@ LLVM:
# define POSH_OS_STRING "UNICOS" # define POSH_OS_STRING "UNICOS"
#endif #endif
#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx //ACS if we're in xcode, look at the target conditionals to figure out if this is ios or osx
# define POSH_OS_OSX 1 #if defined __APPLE__
# define POSH_OS_STRING "MacOS X" # include "TargetConditionals.h"
#endif
#if TARGET_OS_IPHONE
# define POSH_OS_IOS 1
# define POSH_OS_STRING "iOS"
#else
# if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
# define POSH_OS_OSX 1
# define POSH_OS_STRING "MacOS X"
# endif
#endif #endif
#if defined __sun__ || defined sun || defined __sun || defined __solaris__ #if defined __sun__ || defined sun || defined __sun || defined __solaris__

View File

@ -1808,7 +1808,7 @@ typedef unsigned long uint64;
{ {
if (block_inten[0] > m_pSorted_luma[n - 1]) if (block_inten[0] > m_pSorted_luma[n - 1])
{ {
const uint min_error = labs(int(block_inten[0] - m_pSorted_luma[n - 1])); const uint min_error = abs(int(block_inten[0] - m_pSorted_luma[n - 1]));
if (min_error >= trial_solution.m_error) if (min_error >= trial_solution.m_error)
continue; continue;
} }
@ -1822,7 +1822,7 @@ typedef unsigned long uint64;
{ {
if (m_pSorted_luma[0] > block_inten[3]) if (m_pSorted_luma[0] > block_inten[3])
{ {
const uint min_error = labs(int(m_pSorted_luma[0] - block_inten[3])); const uint min_error = abs(int(m_pSorted_luma[0] - block_inten[3]));
if (min_error >= trial_solution.m_error) if (min_error >= trial_solution.m_error)
continue; continue;
} }
@ -1914,7 +1914,7 @@ done:
for (uint packed_c = 0; packed_c < limit; packed_c++) for (uint packed_c = 0; packed_c < limit; packed_c++)
{ {
int v = etc1_decode_value(diff, inten, selector, packed_c); int v = etc1_decode_value(diff, inten, selector, packed_c);
uint err = labs(v - static_cast<int>(color)); uint err = abs(v - static_cast<int>(color));
if (err < best_error) if (err < best_error)
{ {
best_error = err; best_error = err;

View File

@ -14,6 +14,7 @@ SET(BC6H_SRCS
zohtwo.cpp) zohtwo.cpp)
ADD_LIBRARY(bc6h STATIC ${BC6H_SRCS}) ADD_LIBRARY(bc6h STATIC ${BC6H_SRCS})
TARGET_LINK_LIBRARIES(bc6h nvcore nvmath)
IF(NOT WIN32) IF(NOT WIN32)
IF(CMAKE_COMPILER_IS_GNUCXX) IF(CMAKE_COMPILER_IS_GNUCXX)

View File

@ -37,7 +37,7 @@ int Utils::lerp(int a, int b, int i, int denom)
case 3: denom *= 5; i *= 5; // fall through to case 15 case 3: denom *= 5; i *= 5; // fall through to case 15
case 15: weights = denom15_weights_64; break; case 15: weights = denom15_weights_64; break;
case 7: weights = denom7_weights_64; break; case 7: weights = denom7_weights_64; break;
default: nvDebugCheck(0); default: nvUnreachable();
} }
return (a*weights[denom-i] +b*weights[i] + round) >> shift; return (a*weights[denom-i] +b*weights[i] + round) >> shift;

View File

@ -584,7 +584,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
{ {
Vector3 pixels[Tile::TILE_TOTAL]; Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL]; float importance[Tile::TILE_TOTAL];
float err = 0; //float err = 0;
for (int region=0; region<NREGIONS_ONE; ++region) for (int region=0; region<NREGIONS_ONE; ++region)
{ {

View File

@ -672,7 +672,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
{ {
Vector3 pixels[Tile::TILE_TOTAL]; Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL]; float importance[Tile::TILE_TOTAL];
float err = 0; //float err = 0;
for (int region=0; region<NREGIONS_TWO; ++region) for (int region=0; region<NREGIONS_TWO; ++region)
{ {

View File

@ -22,6 +22,7 @@ SET(BC7_SRCS
avpcl_utils.h) avpcl_utils.h)
ADD_LIBRARY(bc7 STATIC ${BC7_SRCS}) ADD_LIBRARY(bc7 STATIC ${BC7_SRCS})
TARGET_LINK_LIBRARIES(bc7 nvcore nvmath)
TARGET_LINK_LIBRARIES(bc7 nvmath) TARGET_LINK_LIBRARIES(bc7 nvmath)

View File

@ -243,7 +243,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex,
static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index) static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
{ {
int mode = AVPCL::getmode(in); //int mode = AVPCL::getmode(in);
pat_index = 0; pat_index = 0;
nvAssert (pat_index >= 0 && pat_index < NPATTERNS); nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
@ -580,7 +580,7 @@ static float exhaustive(const Vector4 colors[], const float importance[], int np
int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta); int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B // now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch]; //bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
int amin, bmin; int amin, bmin;

View File

@ -148,7 +148,7 @@ namespace nv
NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; } NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; } NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
#if NV_CC_MSVC #if NV_NEED_PSEUDOINDEX_WRAPPER
NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) { NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
return m_buffer[i(this)]; return m_buffer[i(this)];
} }

View File

@ -27,7 +27,7 @@
#define NV_FASTCALL __attribute__((fastcall)) #define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline)) inline #define NV_FORCEINLINE __attribute__((always_inline)) inline
#define NV_DEPRECATED __attribute__((deprecated)) #define NV_DEPRECATED __attribute__((deprecated))
#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX #define NV_THREAD_LOCAL __thread
#if __GNUC__ > 2 #if __GNUC__ > 2
#define NV_PURE __attribute__((pure)) #define NV_PURE __attribute__((pure))

View File

@ -31,11 +31,6 @@ bool FileSystem::exists(const char * path)
// PathFileExists requires linking to shlwapi.lib // PathFileExists requires linking to shlwapi.lib
//return PathFileExists(path) != 0; //return PathFileExists(path) != 0;
return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES; return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES;
#elif NV_OS_ORBIS
const int BUFFER_SIZE = 2048;
char file_fullpath[BUFFER_SIZE];
snprintf(file_fullpath, BUFFER_SIZE, "/app0/%s", path);
return sceFiosExistsSync(NULL, file_fullpath);
#else #else
if (FILE * fp = fopen(path, "r")) if (FILE * fp = fopen(path, "r"))
{ {
@ -78,3 +73,31 @@ bool FileSystem::removeFile(const char * path)
// @@ Use unlink or remove? // @@ Use unlink or remove?
return remove(path) == 0; return remove(path) == 0;
} }
#include "StdStream.h" // for fileOpen
bool FileSystem::copyFile(const char * src, const char * dst) {
FILE * fsrc = fileOpen(src, "rb");
if (fsrc == NULL) return false;
NV_ON_RETURN(fclose(fsrc));
FILE * fdst = fileOpen(dst, "wb");
if (fdst == NULL) return false;
NV_ON_RETURN(fclose(fdst));
char buffer[1024];
size_t n;
while ((n = fread(buffer, sizeof(char), sizeof(buffer), fsrc)) > 0) {
if (fwrite(buffer, sizeof(char), n, fdst) != n) {
return false;
}
}
return true;
}

View File

@ -15,7 +15,7 @@ namespace nv
NVCORE_API bool createDirectory(const char * path); NVCORE_API bool createDirectory(const char * path);
NVCORE_API bool changeDirectory(const char * path); NVCORE_API bool changeDirectory(const char * path);
NVCORE_API bool removeFile(const char * path); NVCORE_API bool removeFile(const char * path);
NVCORE_API bool copyFile(const char * src, const char * dst);
} // FileSystem namespace } // FileSystem namespace
} // nv namespace } // nv namespace

View File

@ -33,6 +33,8 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
#else // If typeof not available: #else // If typeof not available:
#define NV_NEED_PSEUDOINDEX_WRAPPER 1
#include <new> // placement new #include <new> // placement new
struct PseudoIndexWrapper { struct PseudoIndexWrapper {

View File

@ -2,6 +2,7 @@
#include "Memory.h" #include "Memory.h"
#include "Debug.h" #include "Debug.h"
#include "Utils.h"
#include <stdlib.h> #include <stdlib.h>
@ -56,6 +57,7 @@ void * realloc(void * ptr, size_t size)
#endif #endif
} }
/* No need to override this unless we want line info. /* No need to override this unless we want line info.
void * operator new (size_t size) throw() void * operator new (size_t size) throw()
{ {
@ -116,4 +118,32 @@ void operator delete(void* p, const std::nothrow_t&) throw()
#endif // NV_OVERRIDE_ALLOC #endif // NV_OVERRIDE_ALLOC
void * nv::aligned_malloc(size_t size, size_t alignment)
{
// alignment must be a power of two, multiple of sizeof(void*)
nvDebugCheck(isPowerOfTwo(alignment));
nvDebugCheck((alignment & (sizeof(void*) - 1)) == 0);
#if NV_OS_WIN32 || NV_OS_DURANGO
return _aligned_malloc(size, alignment);
#elif NV_OS_DARWIN && !NV_OS_IOS
void * ptr = NULL;
posix_memalign(&ptr, alignment, size);
return ptr;
#elif NV_OS_LINUX
return memalign(alignment, size)
#else // NV_OS_ORBIS || NV_OS_IOS
// @@ IC: iOS appears to be 16 byte aligned, should we check alignment and assert if we request a higher alignment factor?
return ::malloc(size);
#endif
}
void nv::aligned_free(void * ptr)
{
#if NV_OS_WIN32 || NV_OS_DURANGO
_aligned_free(ptr);
#else
::free(ptr);
#endif
}

View File

@ -7,10 +7,16 @@
#include "nvcore.h" #include "nvcore.h"
#include <stdlib.h> // malloc(), realloc() and free() #include <stdlib.h> // malloc(), realloc() and free()
#include <string.h> // memset
//#include <stddef.h> // size_t //#include <stddef.h> // size_t
//#include <new> // new and delete //#include <new> // new and delete
#define TRACK_MEMORY_LEAKS 0
#if TRACK_MEMORY_LEAKS
#include <vld.h>
#endif
#if NV_CC_GNUC #if NV_CC_GNUC
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16))) # define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
@ -41,6 +47,8 @@ extern "C" {
#endif #endif
namespace nv { namespace nv {
NVCORE_API void * aligned_malloc(size_t size, size_t alignment);
NVCORE_API void aligned_free(void * );
// C++ helpers. // C++ helpers.
template <typename T> NV_FORCEINLINE T * malloc(size_t count) { template <typename T> NV_FORCEINLINE T * malloc(size_t count) {

View File

@ -113,7 +113,7 @@ namespace nv
public: public:
// BaseClass must implement addRef() and release(). // BaseClass must implement addRef() and release().
typedef SmartPtr<BaseClass> ThisType; typedef SmartPtr<BaseClass> ThisType;
/// Default ctor. /// Default ctor.
SmartPtr() : m_ptr(NULL) SmartPtr() : m_ptr(NULL)

View File

@ -213,9 +213,12 @@ namespace nv
#elif NV_OS_LINUX #elif NV_OS_LINUX
return (uint)fread_unlocked(data, 1, len, m_fp); return (uint)fread_unlocked(data, 1, len, m_fp);
#elif NV_OS_DARWIN #elif NV_OS_DARWIN
// @@ No error checking, always returns len. // This is rather lame. Not sure if it's faster than the locked version.
for (uint i = 0; i < len; i++) { for (uint i = 0; i < len; i++) {
((char *)data)[i] = getc_unlocked(m_fp); ((char *)data)[i] = getc_unlocked(m_fp);
if (feof_unlocked(m_fp) != 0) {
return i;
}
} }
return len; return len;
#else #else

View File

@ -347,26 +347,36 @@ StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg )
} }
/** Append a string. */ // Append a character.
StringBuilder & StringBuilder::append( const char * s ) StringBuilder & StringBuilder::append( char c )
{ {
return append(s, U32(strlen( s ))); return append(&c, 1);
} }
// Append a string.
StringBuilder & StringBuilder::append( const char * s )
{
return append(s, U32(strlen( s )));
}
/** Append a string. */ // Append a string.
StringBuilder & StringBuilder::append(const char * s, uint len) StringBuilder & StringBuilder::append(const char * s, uint len)
{ {
nvDebugCheck(s != NULL); nvDebugCheck(s != NULL);
uint offset = length(); uint offset = length();
const uint size = offset + len + 1; const uint size = offset + len + 1;
reserve(size); reserve(size);
strCpy(m_str + offset, len + 1, s, len); strCpy(m_str + offset, len + 1, s, len);
return *this; return *this;
} }
StringBuilder & StringBuilder::append(const StringBuilder & str)
{
return append(str.m_str, str.length());
}
/** Append a formatted string. */ /** Append a formatted string. */
StringBuilder & StringBuilder::appendFormat( const char * fmt, ... ) StringBuilder & StringBuilder::appendFormat( const char * fmt, ... )
@ -516,6 +526,19 @@ StringBuilder & StringBuilder::copy( const StringBuilder & s )
return *this; return *this;
} }
void StringBuilder::removeChar(char c)
{
char * src = strchr(m_str, c);
if (src) {
char * dst = src;
src++;
while (*src) {
*dst++ = *src++;
}
*dst = '\0';
}
}
bool StringBuilder::endsWith(const char * str) const bool StringBuilder::endsWith(const char * str) const
{ {
uint l = uint(strlen(str)); uint l = uint(strlen(str));
@ -530,7 +553,7 @@ bool StringBuilder::beginsWith(const char * str) const
return strncmp(m_str, str, l) == 0; return strncmp(m_str, str, l) == 0;
} }
// Find given char starting from the end. // Find given char starting from the end. Why not use strrchr!?
char * StringBuilder::reverseFind(char c) char * StringBuilder::reverseFind(char c)
{ {
int length = (int)strlen(m_str) - 1; int length = (int)strlen(m_str) - 1;
@ -563,6 +586,19 @@ char * StringBuilder::release()
return str; return str;
} }
// Take ownership of string.
void StringBuilder::acquire(char * str)
{
if (str) {
m_size = strLen(str) + 1;
m_str = str;
}
else {
m_size = 0;
m_str = NULL;
}
}
// Swap strings. // Swap strings.
void nv::swap(StringBuilder & a, StringBuilder & b) { void nv::swap(StringBuilder & a, StringBuilder & b) {
swap(a.m_size, b.m_size); swap(a.m_size, b.m_size);
@ -585,19 +621,20 @@ const char * Path::extension() const
/*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) { /*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) {
nvCheck(path != NULL); if (path != NULL) {
for (int i = 0;; i++) {
for (int i = 0;; i++) { if (path[i] == '\0') break;
if (path[i] == '\0') break; if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator;
if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator; }
} }
} }
/// Toggles path separators (ie. \\ into /). /// Toggles path separators (ie. \\ into /).
void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/) void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/)
{ {
nvCheck(!isNull()); if (!isNull()) {
translatePath(m_str, pathSeparator); translatePath(m_str, pathSeparator);
}
} }
void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/) void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/)

View File

@ -105,8 +105,10 @@ namespace nv
StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3))); StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
StringBuilder & formatList( const char * format, va_list arg ); StringBuilder & formatList( const char * format, va_list arg );
StringBuilder & append(char c);
StringBuilder & append(const char * str); StringBuilder & append(const char * str);
StringBuilder & append(const char * str, uint len); StringBuilder & append(const char * str, uint len);
StringBuilder & append(const StringBuilder & str);
StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3))); StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
StringBuilder & appendFormatList(const char * format, va_list arg); StringBuilder & appendFormatList(const char * format, va_list arg);
@ -123,21 +125,24 @@ namespace nv
StringBuilder & toLower(); StringBuilder & toLower();
StringBuilder & toUpper(); StringBuilder & toUpper();
void removeChar(char c);
bool endsWith(const char * str) const; bool endsWith(const char * str) const;
bool beginsWith(const char * str) const; bool beginsWith(const char * str) const;
char * reverseFind(char c); char * reverseFind(char c);
void reset(); void reset();
bool isNull() const { return m_size == 0; } NV_FORCEINLINE bool isNull() const { return m_size == 0; }
// const char * accessors // const char * accessors
//operator const char * () const { return m_str; } //operator const char * () const { return m_str; }
//operator char * () { return m_str; } //operator char * () { return m_str; }
const char * str() const { return m_str; } NV_FORCEINLINE const char * str() const { return m_str; }
char * str() { return m_str; } NV_FORCEINLINE char * str() { return m_str; }
char * release(); char * release(); // Release ownership of string.
void acquire(char *); // Take ownership of string.
/// Implement value semantics. /// Implement value semantics.
StringBuilder & operator=( const StringBuilder & s ) { StringBuilder & operator=( const StringBuilder & s ) {
@ -280,25 +285,25 @@ namespace nv
/// Equal operator. /// Equal operator.
bool operator==( const String & str ) const bool operator==( const String & str ) const
{ {
return strMatch(str.data, data); return strEqual(str.data, data);
} }
/// Equal operator. /// Equal operator.
bool operator==( const char * str ) const bool operator==( const char * str ) const
{ {
return strMatch(str, data); return strEqual(str, data);
} }
/// Not equal operator. /// Not equal operator.
bool operator!=( const String & str ) const bool operator!=( const String & str ) const
{ {
return !strMatch(str.data, data); return !strEqual(str.data, data);
} }
/// Not equal operator. /// Not equal operator.
bool operator!=( const char * str ) const bool operator!=( const char * str ) const
{ {
return !strMatch(str, data); return !strEqual(str, data);
} }
/// Returns true if this string is the null string. /// Returns true if this string is the null string.

View File

@ -82,7 +82,7 @@ namespace nv
nvStaticCheck(sizeof(bool) == 4); nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0; uint8 b = c ? 1 : 0;
s.serialize( &b, 1 ); s.serialize( &b, 1 );
c = (b == 1); c = (b != 0);
#else #else
nvStaticCheck(sizeof(bool) == 1); nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 ); s.serialize( &c, 1 );

View File

@ -39,6 +39,28 @@ namespace nv
// These intentionally look like casts. // These intentionally look like casts.
// uint64 casts:
template <typename T> inline uint64 U64(T x) { return x; }
//template <> inline uint64 U64<uint64>(uint64 x) { return x; }
template <> inline uint64 U64<int64>(int64 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U32<uint32>(uint32 x) { return x; }
template <> inline uint64 U64<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U64<uint16>(uint16 x) { return x; }
template <> inline uint64 U64<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U64<uint8>(uint8 x) { return x; }
template <> inline uint64 U64<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint64)x; }
// int64 casts:
template <typename T> inline int64 I64(T x) { return x; }
template <> inline int64 I64<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT64_MAX); return (int64)x; }
//template <> inline uint64 U64<int64>(int64 x) { return x; }
//template <> inline uint64 U32<uint32>(uint32 x) { return x; }
//template <> inline uint64 U64<int32>(int32 x) { return x; }
//template <> inline uint64 U64<uint16>(uint16 x) { return x; }
//template <> inline uint64 U64<int16>(int16 x) { return x; }
//template <> inline uint64 U64<uint8>(uint8 x) { return x; }
//template <> inline uint64 U64<int8>(int8 x) { return x; }
// uint32 casts: // uint32 casts:
template <typename T> inline uint32 U32(T x) { return x; } template <typename T> inline uint32 U32(T x) { return x; }
template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; } template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
@ -50,6 +72,11 @@ namespace nv
//template <> inline uint32 U32<uint8>(uint8 x) { return x; } //template <> inline uint32 U32<uint8>(uint8 x) { return x; }
template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; } template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
#if NV_OS_DARWIN
template <> inline uint32 U32<unsigned long>(unsigned long x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
template <> inline uint32 U32<long>(long x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
#endif
// int32 casts: // int32 casts:
template <typename T> inline int32 I32(T x) { return x; } template <typename T> inline int32 I32(T x) { return x; }
template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; } template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
@ -182,7 +209,7 @@ namespace nv
* @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x * @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
* @note nextPowerOfTwo(x) = 2 << log2(x-1) * @note nextPowerOfTwo(x) = 2 << log2(x-1)
*/ */
inline uint nextPowerOfTwo( uint x ) inline uint32 nextPowerOfTwo(uint32 x)
{ {
nvDebugCheck( x != 0 ); nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction. #if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
@ -202,8 +229,19 @@ namespace nv
#endif #endif
} }
/// Return true if @a n is a power of two. inline uint64 nextPowerOfTwo(uint64 x)
inline bool isPowerOfTwo( uint n ) {
nvDebugCheck(x != 0);
uint p = 1;
while (x > p) {
p += p;
}
return p;
}
// @@ Should I just use a macro instead?
template <typename T>
inline bool isPowerOfTwo(T n)
{ {
return (n & (n-1)) == 0; return (n & (n-1)) == 0;
} }

View File

@ -56,6 +56,7 @@
# define NV_OS_MINGW 1 # define NV_OS_MINGW 1
# define NV_OS_WIN32 1 # define NV_OS_WIN32 1
#elif defined POSH_OS_OSX #elif defined POSH_OS_OSX
# define NV_OS_OSX 1 // IC: Adding this, because iOS defines NV_OS_DARWIN too.
# define NV_OS_DARWIN 1 # define NV_OS_DARWIN 1
# define NV_OS_UNIX 1 # define NV_OS_UNIX 1
#elif defined POSH_OS_IOS #elif defined POSH_OS_IOS
@ -78,9 +79,9 @@
// Threading: // Threading:
// some platforms don't implement __thread or similar for thread-local-storage // some platforms don't implement __thread or similar for thread-local-storage
#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios? #if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS
# define NV_OS_USE_PTHREAD 1 # define NV_OS_USE_PTHREAD 1
# if NV_OS_DARWIN || NV_OS_IOS # if 0 //Apple finally added TLS support to iOS!// NV_OS_IOS
# define NV_OS_HAS_TLS_QUALIFIER 0 # define NV_OS_HAS_TLS_QUALIFIER 0
# else # else
# define NV_OS_HAS_TLS_QUALIFIER 1 # define NV_OS_HAS_TLS_QUALIFIER 1
@ -96,7 +97,7 @@
// NV_CPU_X86_64 // NV_CPU_X86_64
// NV_CPU_PPC // NV_CPU_PPC
// NV_CPU_ARM // NV_CPU_ARM
// NV_CPU_AARCH64 // NV_CPU_ARM_64
#define NV_CPU_STRING POSH_CPU_STRING #define NV_CPU_STRING POSH_CPU_STRING
@ -110,7 +111,7 @@
#elif defined POSH_CPU_STRONGARM #elif defined POSH_CPU_STRONGARM
# define NV_CPU_ARM 1 # define NV_CPU_ARM 1
#elif defined POSH_CPU_AARCH64 #elif defined POSH_CPU_AARCH64
# define NV_CPU_AARCH64 1 # define NV_CPU_ARM_64 1
#else #else
# error "Unsupported CPU" # error "Unsupported CPU"
#endif #endif
@ -148,10 +149,16 @@
#endif #endif
// Endiannes: // Endiannes:
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN // @@ POSH endian detection is broken for arm64 on iOS. They are bi-endian and iOS sets all their processors to little endian by default.
#define NV_BIG_ENDIAN POSH_BIG_ENDIAN #if NV_OS_IOS
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING # define NV_LITTLE_ENDIAN 1
# define NV_BIG_ENDIAN 0
# define NV_ENDIAN_STRING "little"
#else
# define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
# define NV_BIG_ENDIAN POSH_BIG_ENDIAN
# define NV_ENDIAN_STRING POSH_ENDIAN_STRING
#endif
// Define the right printf prefix for size_t arguments: // Define the right printf prefix for size_t arguments:
#if POSH_64BIT_POINTER #if POSH_64BIT_POINTER
@ -164,6 +171,28 @@
// cmake config // cmake config
#include "nvconfig.h" #include "nvconfig.h"
#if NV_OS_DARWIN
#include <stdint.h>
//#include <inttypes.h>
// Type definitions:
typedef uint8_t uint8;
typedef int8_t int8;
typedef uint16_t uint16;
typedef int16_t int16;
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint64_t uint64;
typedef int64_t int64;
// POSH gets this wrong due to __LP64__
#undef POSH_I64_PRINTF_PREFIX
#define POSH_I64_PRINTF_PREFIX "ll"
#else
// Type definitions: // Type definitions:
typedef posh_u8_t uint8; typedef posh_u8_t uint8;
@ -175,8 +204,23 @@ typedef posh_i16_t int16;
typedef posh_u32_t uint32; typedef posh_u32_t uint32;
typedef posh_i32_t int32; typedef posh_i32_t int32;
//#if NV_OS_DARWIN
// OSX-64 is supposed to be LP64 (longs and pointers are 64 bits), thus uint64 is defined as
// unsigned long. However, some OSX headers define it as unsigned long long, producing errors,
// even though both types are 64 bit. Ideally posh should handle that, but it has not been
// updated in ages, so here I'm just falling back to the standard C99 types defined in inttypes.h
//#include <inttypes.h>
//typedef posh_u64_t uint64_t;
//typedef posh_i64_t int64_t;
//#else
typedef posh_u64_t uint64; typedef posh_u64_t uint64;
typedef posh_i64_t int64; typedef posh_i64_t int64;
//#endif
#if NV_OS_DARWIN
// To avoid duplicate definitions.
#define _UINT64
#endif
#endif
// Aliases // Aliases
typedef uint32 uint; typedef uint32 uint;
@ -246,8 +290,10 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_COMPILER_CHECK(sizeof(int32) == 4); NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4); NV_COMPILER_CHECK(sizeof(uint32) == 4);
#include <stddef.h> // for size_t
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) template <typename T, size_t N> char (&ArraySizeHelper(T (&array)[N]))[N];
#define NV_ARRAY_SIZE(x) sizeof(ArraySizeHelper(x))
//#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#if 0 // Disabled in The Witness. #if 0 // Disabled in The Witness.
#if NV_CC_MSVC #if NV_CC_MSVC
@ -269,8 +315,38 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \ NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
} }
namespace nv {
template <typename F>
struct ScopeExit {
ScopeExit(F f) : f(f) {}
~ScopeExit() { f(); }
F f;
};
template <typename F>
ScopeExit<F> MakeScopeExit(F f) {
return ScopeExit<F>(f);
};
}
#define NV_ON_RETURN(code) \
auto NV_STRING_JOIN2(scope_exit_, __LINE__) = nv::MakeScopeExit([=](){code;})
// Indicate the compiler that the parameter is not used to suppress compier warnings. // Indicate the compiler that the parameter is not used to suppress compier warnings.
#if NV_CC_MSVC
#define NV_UNUSED(a) ((a)=(a)) #define NV_UNUSED(a) ((a)=(a))
#else
#define NV_UNUSED(a) _Pragma(NV_STRING(unused(a)))
#endif
#if NV_CC_GNUC || NV_CC_CLANG
#define NV_LIKELY(x) __builtin_expect(!!(x), 1)
#define NV_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define NV_LIKELY(x) x
#define NV_UNLIKELY(x) x
#endif
// Null index. @@ Move this somewhere else... it's only used by nvmesh. // Null index. @@ Move this somewhere else... it's only used by nvmesh.
//const unsigned int NIL = unsigned int(~0); //const unsigned int NIL = unsigned int(~0);

View File

@ -632,44 +632,45 @@ void BlockCTX1::setIndices(int * idx)
/// Decode BC6 block. /// Decode BC6 block.
void BlockBC6::decodeBlock(Vector3 colors[16]) const void BlockBC6::decodeBlock(Vector4 colors[16]) const
{ {
ZOH::Tile tile(4, 4); ZOH::Tile tile(4, 4);
ZOH::decompress((const char *)data, tile); ZOH::decompress((const char *)data, tile);
// Convert ZOH's tile struct to Vector3, and convert half to float. // Convert ZOH's tile struct to Vector3, and convert half to float.
for (uint y = 0; y < 4; ++y) for (uint y = 0; y < 4; ++y)
{ {
for (uint x = 0; x < 4; ++x) for (uint x = 0; x < 4; ++x)
{ {
uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x); uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x);
uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y); uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y);
uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z); uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z);
colors[y * 4 + x].x = to_float(rHalf); colors[y * 4 + x].x = to_float(rHalf);
colors[y * 4 + x].y = to_float(gHalf); colors[y * 4 + x].y = to_float(gHalf);
colors[y * 4 + x].z = to_float(bHalf); colors[y * 4 + x].z = to_float(bHalf);
} colors[y * 4 + x].w = 1.0f;
} }
}
} }
/// Decode BC7 block. /// Decode BC7 block.
void BlockBC7::decodeBlock(ColorBlock * block) const void BlockBC7::decodeBlock(ColorBlock * block) const
{ {
AVPCL::Tile tile(4, 4); AVPCL::Tile tile(4, 4);
AVPCL::decompress((const char *)data, tile); AVPCL::decompress((const char *)data, tile);
// Convert AVPCL's tile struct back to NVTT's. // Convert AVPCL's tile struct back to NVTT's.
for (uint y = 0; y < 4; ++y) for (uint y = 0; y < 4; ++y)
{ {
for (uint x = 0; x < 4; ++x) for (uint x = 0; x < 4; ++x)
{ {
Vector4 rgba = tile.data[y][x]; Vector4 rgba = tile.data[y][x];
// Note: decoded rgba values are in [0, 255] range and should be an integer, // Note: decoded rgba values are in [0, 255] range and should be an integer,
// because BC7 never uses more than 8 bits per channel. So no need to round. // because BC7 never uses more than 8 bits per channel. So no need to round.
block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w)); block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w));
} }
} }
} }

View File

@ -36,6 +36,7 @@ namespace nv
struct AlphaBlock4x4; struct AlphaBlock4x4;
class Stream; class Stream;
class Vector3; class Vector3;
class Vector4;
/// DXT1 block. /// DXT1 block.
@ -220,7 +221,7 @@ namespace nv
struct BlockBC6 struct BlockBC6
{ {
uint8 data[16]; // Not even going to try to write a union for this thing. uint8 data[16]; // Not even going to try to write a union for this thing.
void decodeBlock(Vector3 colors[16]) const; void decodeBlock(Vector4 colors[16]) const;
}; };
/// BC7 block. /// BC7 block.

View File

@ -14,7 +14,8 @@ SET(IMAGE_SRCS
NormalMap.h NormalMap.cpp NormalMap.h NormalMap.cpp
PixelFormat.h PixelFormat.h
PsdFile.h PsdFile.h
TgaFile.h) TgaFile.h
KtxFile.h KtxFile.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -454,7 +454,8 @@ namespace
{ D3DFMT_L8, DXGI_FORMAT_R8_UNORM , { 8, 0xFF, 0, 0, 0 } }, { D3DFMT_L8, DXGI_FORMAT_R8_UNORM , { 8, 0xFF, 0, 0, 0 } },
{ D3DFMT_L16, DXGI_FORMAT_R16_UNORM, { 16, 0xFFFF, 0, 0, 0 } }, { D3DFMT_L16, DXGI_FORMAT_R16_UNORM, { 16, 0xFFFF, 0, 0, 0 } },
{ D3DFMT_A8L8, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0, 0, 0xFF00 } }, { D3DFMT_A8L8, 0, { 16, 0xFF, 0, 0, 0xFF00 } },
{ 0, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0xFF00, 0, 0 } },
}; };
static const uint s_formatCount = NV_ARRAY_SIZE(s_formats); static const uint s_formatCount = NV_ARRAY_SIZE(s_formats);
@ -635,7 +636,7 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{ {
// set fourcc pixel format. // set fourcc pixel format.
this->pf.flags = DDPF_FOURCC; this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3); this->pf.fourcc = NV_MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0; this->pf.bitcount = 0;
this->pf.rmask = 0; this->pf.rmask = 0;
@ -659,7 +660,7 @@ void DDSHeader::setFormatCode(uint32 code)
void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3) void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{ {
this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3); this->pf.bitcount = NV_MAKEFOURCC(c0, c1, c2, c3);
} }
@ -1445,7 +1446,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
{ {
BlockBC6 block; BlockBC6 block;
*stream << block; *stream << block;
Vector3 colors[16]; Vector4 colors[16];
block.decodeBlock(colors); block.decodeBlock(colors);
// Clamp to [0, 1] and round to 8-bit // Clamp to [0, 1] and round to 8-bit
@ -1453,7 +1454,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
{ {
for (int x = 0; x < 4; ++x) for (int x = 0; x < 4; ++x)
{ {
Vector3 px = colors[y*4 + x]; Vector4 px = colors[y*4 + x];
rgba->color(x, y).setRGBA( rgba->color(x, y).setRGBA(
ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f), ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f),
ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f), ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f),
@ -1535,7 +1536,7 @@ uint DirectDrawSurface::surfaceSize(uint mipmap) const
else { else {
w = (w + 3) / 4; w = (w + 3) / 4;
h = (h + 3) / 4; h = (h + 3) / 4;
d = d; // @@ How are 3D textures aligned? //d = d; // @@ How are 3D textures aligned?
return blockSize * w * h * d; return blockSize * w * h * d;
} }
} }

View File

@ -27,11 +27,9 @@
#include "nvimage.h" #include "nvimage.h"
#if !defined(MAKEFOURCC) #define NV_MAKEFOURCC(ch0, ch1, ch2, ch3) \
#define MAKEFOURCC(ch0, ch1, ch2, ch3) \
(uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \ (uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \
(uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 )) (uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 ))
#endif
namespace nv namespace nv
{ {
@ -101,19 +99,26 @@ namespace nv
enum FOURCC enum FOURCC
{ {
FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'), FOURCC_NVTT = NV_MAKEFOURCC('N', 'V', 'T', 'T'),
FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '), FOURCC_DDS = NV_MAKEFOURCC('D', 'D', 'S', ' '),
FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'), FOURCC_DXT1 = NV_MAKEFOURCC('D', 'X', 'T', '1'),
FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'), FOURCC_DXT2 = NV_MAKEFOURCC('D', 'X', 'T', '2'),
FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'), FOURCC_DXT3 = NV_MAKEFOURCC('D', 'X', 'T', '3'),
FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'), FOURCC_DXT4 = NV_MAKEFOURCC('D', 'X', 'T', '4'),
FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'), FOURCC_DXT5 = NV_MAKEFOURCC('D', 'X', 'T', '5'),
FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'), FOURCC_RXGB = NV_MAKEFOURCC('R', 'X', 'G', 'B'),
FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'), FOURCC_ATI1 = NV_MAKEFOURCC('A', 'T', 'I', '1'),
FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'), FOURCC_ATI2 = NV_MAKEFOURCC('A', 'T', 'I', '2'),
FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'), FOURCC_A2XY = NV_MAKEFOURCC('A', '2', 'X', 'Y'),
FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'), FOURCC_DX10 = NV_MAKEFOURCC('D', 'X', '1', '0'),
FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'), FOURCC_UVER = NV_MAKEFOURCC('U', 'V', 'E', 'R'),
FOURCC_BC6H = NV_MAKEFOURCC('B', 'C', '6', 'H'),
FOURCC_BC7L = NV_MAKEFOURCC('B', 'C', '7', 'L'),
FOURCC_PVR0 = NV_MAKEFOURCC('P', 'V', 'R', '0'),
FOURCC_PVR1 = NV_MAKEFOURCC('P', 'V', 'R', '1'),
FOURCC_PVR2 = NV_MAKEFOURCC('P', 'V', 'R', '2'),
FOURCC_PVR3 = NV_MAKEFOURCC('P', 'V', 'R', '3'),
}; };

View File

@ -132,6 +132,59 @@ float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img)
} }
float nv::rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight)
{
nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4);
double mse = 0;
const uint w0 = ref->width();
const uint h0 = ref->height();
const uint d0 = ref->depth();
const uint w1 = img->width();
const uint h1 = img->height();
const uint d1 = img->depth();
for (uint z = 0; z < d0; z++) {
for (uint y = 0; y < h0; y++) {
for (uint x = 0; x < w0; x++) {
float r0 = ref->pixel(0, x, y, z);
float g0 = ref->pixel(1, x, y, z);
float b0 = ref->pixel(2, x, y, z);
float a0 = ref->pixel(3, x, y, z);
float fx = float(x) / w0;
float fy = float(y) / h0;
float fz = float(z) / d0;
float r1 = img->sampleLinear(0, fx, fy, fz, wm);
float g1 = img->sampleLinear(1, fx, fy, fz, wm);
float b1 = img->sampleLinear(2, fx, fy, fz, wm);
float a1 = img->sampleLinear(2, fx, fy, fz, wm);
float dr = r0 - r1;
float dg = g0 - g1;
float db = b0 - b1;
float da = a0 - a1;
float w = 1;
if (alphaWeight) w = a0 * a0; // @@ a0*a1 or a0*a0 ?
mse += (dr * dr) * w;
mse += (dg * dg) * w;
mse += (db * db) * w;
mse += (da * da);
}
}
}
int count = w0 * h0 * d0;
return float(sqrt(mse / count));
}
// Color space conversions based on: // Color space conversions based on:
// http://www.brucelindbloom.com/ // http://www.brucelindbloom.com/

View File

@ -1,5 +1,6 @@
#include "nvimage.h" #include "nvimage.h"
#include "FloatImage.h" // For FloatImage::WrapMode
namespace nv namespace nv
@ -9,13 +10,15 @@ namespace nv
float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight); float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float rmsAlphaError(const FloatImage * ref, const FloatImage * img); float rmsAlphaError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight);
float cieLabError(const FloatImage * ref, const FloatImage * img); float cieLabError(const FloatImage * ref, const FloatImage * img);
float cieLab94Error(const FloatImage * ref, const FloatImage * img); float cieLab94Error(const FloatImage * ref, const FloatImage * img);
float spatialCieLabError(const FloatImage * ref, const FloatImage * img); float spatialCieLabError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float averageAngularError(const FloatImage * img0, const FloatImage * img1); float averageAngularError(const FloatImage * img0, const FloatImage * img1);
float rmsAngularError(const FloatImage * img0, const FloatImage * img1); float rmsAngularError(const FloatImage * img0, const FloatImage * img1);

View File

@ -4,6 +4,8 @@
#include "Filter.h" #include "Filter.h"
#include "Image.h" #include "Image.h"
#include "nvthread/ParallelFor.h"
#include "nvmath/Color.h" #include "nvmath/Color.h"
#include "nvmath/Vector.inl" #include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl" #include "nvmath/Matrix.inl"
@ -28,6 +30,13 @@ FloatImage::FloatImage() : m_componentCount(0), m_width(0), m_height(0), m_depth
{ {
} }
FloatImage::FloatImage(const FloatImage & img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0),
m_pixelCount(0), m_floatCount(0), m_mem(NULL)
{
allocate(img.m_componentCount, img.m_width, img.m_height, img.m_depth);
memcpy(m_mem, img.m_mem, m_floatCount * sizeof(float));
}
/// Ctor. Init from image. /// Ctor. Init from image.
FloatImage::FloatImage(const Image * img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0), FloatImage::FloatImage(const Image * img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0),
m_pixelCount(0), m_floatCount(0), m_mem(NULL) m_pixelCount(0), m_floatCount(0), m_mem(NULL)
@ -41,27 +50,32 @@ FloatImage::~FloatImage()
free(); free();
} }
/// Init the floating point image from a regular image. /// Init the floating point image from a regular image.
void FloatImage::initFrom(const Image * img) void FloatImage::initFrom(const Image * img)
{ {
nvCheck(img != NULL); nvCheck(img != NULL);
allocate(4, img->width(), img->height(), img->depth()); uint channel_count = 3;
if (img->format() == Image::Format_ARGB) channel_count = 4;
allocate(channel_count, img->width(), img->height(), img->depth());
float * red_channel = channel(0); float * red_channel = channel(0);
float * green_channel = channel(1); float * green_channel = channel(1);
float * blue_channel = channel(2); float * blue_channel = channel(2);
float * alpha_channel = channel(3); float * alpha_channel = (channel_count == 4) ? channel(3) : NULL;
float scale = 1.0f / 255.0f;
const uint count = m_pixelCount; const uint count = m_pixelCount;
for (uint i = 0; i < count; i++) { for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [&](int i) {
Color32 pixel = img->pixel(i); Color32 pixel = img->pixel(i);
red_channel[i] = float(pixel.r) / 255.0f; red_channel[i] = float(pixel.r) * scale;
green_channel[i] = float(pixel.g) / 255.0f; green_channel[i] = float(pixel.g) * scale;
blue_channel[i] = float(pixel.b) / 255.0f; blue_channel[i] = float(pixel.b) * scale;
alpha_channel[i] = float(pixel.a) / 255.0f; if (channel_count == 4) alpha_channel[i] = float(pixel.a) * scale;
} }//);
} }
/// Convert the floating point image to a regular image. /// Convert the floating point image to a regular image.
@ -475,13 +489,17 @@ float FloatImage::sampleLinearClamp(uint c, float x, float y, float z) const
const float fracY = frac(y); const float fracY = frac(y);
const float fracZ = frac(z); const float fracZ = frac(z);
//x -= fracX;
//y -= fracY;
//z -= fracZ;
// @@ Using floor in some places, but round in others? // @@ Using floor in some places, but round in others?
const int ix0 = ::clamp(ifloor(x), 0, w-1); const int ix0 = ::clamp(ifloor(x), 0, w-1);
const int iy0 = ::clamp(ifloor(y), 0, h-1); const int iy0 = ::clamp(ifloor(y), 0, h-1);
const int iz0 = ::clamp(ifloor(z), 0, h-1); const int iz0 = ::clamp(ifloor(z), 0, d-1);
const int ix1 = ::clamp(ifloor(x)+1, 0, w-1); const int ix1 = ::clamp(ifloor(x)+1, 0, w-1);
const int iy1 = ::clamp(ifloor(y)+1, 0, h-1); const int iy1 = ::clamp(ifloor(y)+1, 0, h-1);
const int iz1 = ::clamp(ifloor(z)+1, 0, h-1); const int iz1 = ::clamp(ifloor(z)+1, 0, d-1);
return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ); return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ);
} }
@ -757,8 +775,8 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
dst_image->allocate(m_componentCount, w, h); dst_image->allocate(m_componentCount, w, h);
// @@ We could avoid this allocation, write directly to dst_plane. // @@ We could avoid this allocation, write directly to dst_plane.
Array<float> tmp_column(h); //Array<float> tmp_column(h);
tmp_column.resize(h); //tmp_column.resize(h);
for (uint c = 0; c < m_componentCount; c++) for (uint c = 0; c < m_componentCount; c++)
{ {
@ -767,19 +785,21 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
float * tmp_plane = tmp_image->plane(c, z); float * tmp_plane = tmp_image->plane(c, z);
for (uint y = 0; y < m_height; y++) { for (uint y = 0; y < m_height; y++) {
//parallel_for(m_height, [&](int y) {
this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w); this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w);
} }//);
float * dst_plane = dst_image->plane(c, z); float * dst_plane = dst_image->plane(c, z);
for (uint x = 0; x < w; x++) { for (uint x = 0; x < w; x++) {
tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); //parallel_for(w, [&](int x) {
tmp_image->applyKernelY(ykernel, x, z, c, wm, dst_plane + x, w);
// @@ We could avoid this copy, write directly to dst_plane. // @@ We could avoid this copy, write directly to dst_plane.
for (uint y = 0; y < h; y++) { /*for (uint y = 0; y < h; y++) {
dst_plane[y * w + x] = tmp_column[y]; dst_plane[y * w + x] = tmp_column[y];
} }*/
} }//);
} }
} }
} }
@ -840,7 +860,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, W
for (uint z = 0; z < d; z++ ) { for (uint z = 0; z < d; z++ ) {
for (uint x = 0; x < w; x++) { for (uint x = 0; x < w; x++) {
tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1);
for (uint y = 0; y < h; y++) { for (uint y = 0; y < h; y++) {
dst_channel[z * h * w + y * w + x] = tmp_column[y]; dst_channel[z * h * w + y * w + x] = tmp_column[y];
@ -890,7 +910,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
float * dst_plane = dst_image->plane(c, z); float * dst_plane = dst_image->plane(c, z);
for (uint x = 0; x < w; x++) { for (uint x = 0; x < w; x++) {
tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1);
// @@ Avoid this copy, write directly to dst_plane. // @@ Avoid this copy, write directly to dst_plane.
for (uint y = 0; y < h; y++) { for (uint y = 0; y < h; y++) {
@ -961,7 +981,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, W
for (uint z = 0; z < d; z++ ) { for (uint z = 0; z < d; z++ ) {
for (uint x = 0; x < w; x++) { for (uint x = 0; x < w; x++) {
tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer()); tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1);
for (uint y = 0; y < h; y++) { for (uint y = 0; y < h; y++) {
dst_channel[z * h * w + y * w + x] = tmp_column[y]; dst_channel[z * h * w + y * w + x] = tmp_column[y];
@ -1124,7 +1144,7 @@ void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, W
} }
/// Apply 1D vertical kernel at the given coordinates and return result. /// Apply 1D vertical kernel at the given coordinates and return result.
void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * __restrict output) const void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * __restrict output, int output_stride) const
{ {
const uint length = k.length(); const uint length = k.length();
const float scale = float(length) / float(m_height); const float scale = float(length) / float(m_height);
@ -1151,7 +1171,7 @@ void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, W
sum += k.valueAt(i, j) * channel[idx]; sum += k.valueAt(i, j) * channel[idx];
} }
output[i] = sum; output[i * output_stride] = sum;
} }
} }
@ -1225,7 +1245,7 @@ void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, u
} }
/// Apply 1D vertical kernel at the given coordinates and return result. /// Apply 1D vertical kernel at the given coordinates and return result.
void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * __restrict output) const void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * __restrict output, int output_stride) const
{ {
const uint length = k.length(); const uint length = k.length();
const float scale = float(length) / float(m_height); const float scale = float(length) / float(m_height);
@ -1256,7 +1276,7 @@ void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, u
sum += w * channel[idx]; sum += w * channel[idx];
} }
output[i] = sum / norm; output[i * output_stride] = sum / norm;
} }
} }
@ -1432,11 +1452,19 @@ void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int
float minAlphaScale = 0.0f; float minAlphaScale = 0.0f;
float maxAlphaScale = 4.0f; float maxAlphaScale = 4.0f;
float alphaScale = 1.0f; float alphaScale = 1.0f;
float bestAlphaScale = 1.0f;
float bestError = NV_FLOAT_MAX;
// Determine desired scale using a binary search. Hardcoded to 8 steps max. // Determine desired scale using a binary search. Hardcoded to 10 steps max.
for (int i = 0; i < 10; i++) { for (int i = 0; i < 10; i++) {
float currentCoverage = alphaTestCoverage(alphaRef, alphaChannel, alphaScale); float currentCoverage = alphaTestCoverage(alphaRef, alphaChannel, alphaScale);
float error = fabsf(currentCoverage - desiredCoverage);
if (error < bestError) {
bestError = error;
bestAlphaScale = alphaScale;
}
if (currentCoverage < desiredCoverage) { if (currentCoverage < desiredCoverage) {
minAlphaScale = alphaScale; minAlphaScale = alphaScale;
} }
@ -1451,7 +1479,7 @@ void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int
} }
// Scale alpha channel. // Scale alpha channel.
scaleBias(alphaChannel, 1, alphaScale, 0.0f); scaleBias(alphaChannel, 1, bestAlphaScale, 0.0f);
clamp(alphaChannel, 1, 0.0f, 1.0f); clamp(alphaChannel, 1, 0.0f, 1.0f);
#endif #endif
#if _DEBUG #if _DEBUG

View File

@ -35,6 +35,7 @@ namespace nv
}; };
NVIMAGE_API FloatImage(); NVIMAGE_API FloatImage();
NVIMAGE_API FloatImage(const FloatImage & img);
NVIMAGE_API FloatImage(const Image * img); NVIMAGE_API FloatImage(const Image * img);
NVIMAGE_API virtual ~FloatImage(); NVIMAGE_API virtual ~FloatImage();
@ -92,10 +93,10 @@ namespace nv
NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const; NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const; NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const;

View File

@ -42,13 +42,21 @@ const Image & Image::operator=(const Image & img)
void Image::allocate(uint w, uint h, uint d/*= 1*/) void Image::allocate(uint w, uint h, uint d/*= 1*/)
{ {
free();
m_width = w; m_width = w;
m_height = h; m_height = h;
m_depth = d; m_depth = d;
m_data = realloc<Color32>(m_data, w * h * d); m_data = realloc<Color32>(m_data, w * h * d);
} }
void Image::acquire(Color32 * data, uint w, uint h, uint d/*= 1*/)
{
free();
m_width = w;
m_height = h;
m_depth = d;
m_data = data;
}
void Image::resize(uint w, uint h, uint d/*= 1*/) { void Image::resize(uint w, uint h, uint d/*= 1*/) {
Image img; Image img;

View File

@ -34,6 +34,7 @@ namespace nv
void allocate(uint w, uint h, uint d = 1); void allocate(uint w, uint h, uint d = 1);
void acquire(Color32 * data, uint w, uint h, uint d = 1);
bool load(const char * name); bool load(const char * name);
void resize(uint w, uint h, uint d = 1); void resize(uint w, uint h, uint d = 1);

View File

@ -8,6 +8,8 @@
#include "DirectDrawSurface.h" #include "DirectDrawSurface.h"
#include "PixelFormat.h" #include "PixelFormat.h"
#include "nvthread/ParallelFor.h"
#include "nvmath/Color.h" #include "nvmath/Color.h"
#include "nvmath/Half.h" #include "nvmath/Half.h"
@ -19,31 +21,31 @@
#include "nvcore/TextWriter.h" #include "nvcore/TextWriter.h"
// Extern // Extern
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
# include <FreeImage.h> # include <FreeImage.h>
// If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms. // If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms.
# undef HAVE_JPEG # undef NV_HAVE_JPEG
# undef HAVE_PNG # undef NV_HAVE_PNG
# undef HAVE_TIFF # undef NV_HAVE_TIFF
# undef HAVE_OPENEXR # undef NV_HAVE_OPENEXR
#endif #endif
#if defined(HAVE_JPEG) #if defined(NV_HAVE_JPEG)
extern "C" { extern "C" {
# include <jpeglib.h> # include <jpeglib.h>
} }
#endif #endif
#if defined(HAVE_PNG) #if defined(NV_HAVE_PNG)
# include <png.h> # include <png.h>
#endif #endif
#if defined(HAVE_TIFF) #if defined(NV_HAVE_TIFF)
# define _TIFF_DATA_TYPEDEFS_ # define _TIFF_DATA_TYPEDEFS_
# include <tiffio.h> # include <tiffio.h>
#endif #endif
#if defined(HAVE_OPENEXR) #if defined(NV_HAVE_OPENEXR)
# include <ImfIO.h> # include <ImfIO.h>
# include <ImathBox.h> # include <ImathBox.h>
# include <ImfChannelList.h> # include <ImfChannelList.h>
@ -52,7 +54,7 @@ extern "C" {
# include <ImfArray.h> # include <ImfArray.h>
#endif #endif
#if defined(HAVE_STBIMAGE) #if defined(NV_HAVE_STBIMAGE)
# define STBI_NO_STDIO # define STBI_NO_STDIO
# include <stb_image.h> # include <stb_image.h>
#endif #endif
@ -303,6 +305,51 @@ static bool saveTGA(Stream & s, const Image * img)
return true; return true;
} }
#pragma optimize("", off)
// Save BMP image.
static bool saveBMP(Stream & s, const Image * img)
{
int w = img->width();
int h = img->height();
int image_size = w * h * 3;
BmpFileHeader header;
zero(header);
header.type = BM_TYPE;
header.size = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE + image_size;
header.offBits = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE;
BmpInfoHeader info;
zero(info);
info.size = BITMAPINFOHEADER_SIZE;
info.width = w;
info.height = h;
info.planes = 1;
info.bitCount = 24;
info.sizeImage = image_size;
info.xPelsPerMeter = 2000;
info.yPelsPerMeter = 2000;
s << header;
s << info;
nv::Array<uint8> data;
data.resize(3 * w);
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
data[x * 3 + 0] = img->pixel(x, h - y - 1).b;
data[x * 3 + 1] = img->pixel(x, h - y - 1).g;
data[x * 3 + 2] = img->pixel(x, h - y - 1).r;
}
s.serialize(data.buffer(), data.size());
}
return true;
}
/*static Image * loadPPM(Stream & s) /*static Image * loadPPM(Stream & s)
{ {
// @@ // @@
@ -324,7 +371,10 @@ static bool savePPM(Stream & s, const Image * img)
writer.writeString("255\n"); writer.writeString("255\n");
for (uint i = 0; i < w * h; i++) { for (uint i = 0; i < w * h; i++) {
Color32 c = img->pixel(i); Color32 c = img->pixel(i);
s << (uint8_t&)c.r << (uint8_t&)c.g << (uint8_t&)c.b; uint8 r = c.r; // current version of apple's llvm compiling for arm64 doesn't like taking the address of a bit-field. Workaround by using the stack
uint8 g = c.g;
uint8 b = c.b;
s << r << g << b;
} }
return true; return true;
@ -653,7 +703,7 @@ static bool saveFloatDDS(Stream & s, const FloatImage * img, uint base_component
} }
#if defined(HAVE_PNG) #if defined(NV_HAVE_PNG)
static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length) static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
{ {
@ -902,9 +952,9 @@ static bool savePNG(Stream & s, const Image * img, const char ** tags/*=NULL*/)
return true; return true;
} }
#endif // defined(HAVE_PNG) #endif // defined(NV_HAVE_PNG)
#if defined(HAVE_JPEG) #if defined(NV_HAVE_JPEG)
static void init_source (j_decompress_ptr /*cinfo*/){ static void init_source (j_decompress_ptr /*cinfo*/){
} }
@ -1011,9 +1061,9 @@ static Image * loadJPG(Stream & s)
return img.release(); return img.release();
} }
#endif // defined(HAVE_JPEG) #endif // defined(NV_HAVE_JPEG)
#if defined(HAVE_TIFF) #if defined(NV_HAVE_TIFF)
/* /*
static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size) static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size)
@ -1207,9 +1257,9 @@ static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint
return true; return true;
} }
#endif // defined(HAVE_TIFF) #endif // defined(NV_HAVE_TIFF)
#if defined(HAVE_OPENEXR) #if defined(NV_HAVE_OPENEXR)
namespace namespace
{ {
@ -1348,10 +1398,10 @@ static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint
return true; return true;
} }
#endif // defined(HAVE_OPENEXR) #endif // defined(NV_HAVE_OPENEXR)
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle) static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle)
{ {
@ -1688,10 +1738,10 @@ bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Fl
return result; return result;
} }
#endif // defined(HAVE_FREEIMAGE) #endif // defined(NV_HAVE_FREEIMAGE)
#if defined(HAVE_STBIMAGE) #if defined(NV_HAVE_STBIMAGE)
static Image * loadSTB(Stream & s) static Image * loadSTB(Stream & s)
{ {
@ -1704,28 +1754,22 @@ static Image * loadSTB(Stream & s)
int w, h, n; int w, h, n;
uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4); uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4);
// @@ Hack: STB is returning n=4, because we request 4 components, even when input only has 3.
n = 3;
delete [] buffer; delete [] buffer;
if (data != NULL) { if (data != NULL) {
Image * img = new Image; Image * img = new Image;
img->allocate(w, h); img->acquire((Color32 *)data, w, h);
img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB); img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB);
for (int y = 0; y < h; ++y) int count = w * h;
{ for (int i = 0; i < count; ++i) {
nv::Color32* dest = img->scanline(y); //parallel_for(count, 128, [&](int i) {
uint8* src = data + y * w * 4; Color32 & pixel = img->pixel(i);
swap(pixel.r, pixel.b);
for (int x = 0; x < w; ++x) }//);
{
dest[x].r = src[x * 4 + 0];
dest[x].g = src[x * 4 + 1];
dest[x].b = src[x * 4 + 2];
dest[x].a = src[x * 4 + 3];
}
}
free(data);
return img; return img;
} }
@ -1766,7 +1810,7 @@ static FloatImage * loadFloatSTB(Stream & s)
return NULL; return NULL;
} }
#endif // defined(HAVE_STBIMAGE) #endif // defined(NV_HAVE_STBIMAGE)
@ -1804,32 +1848,33 @@ Image * nv::ImageIO::load(const char * fileName, Stream & s)
return loadPPM(s); return loadPPM(s);
}*/ }*/
#if defined(HAVE_JPEG) #if defined(NV_HAVE_JPEG)
if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) { if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) {
return loadJPG(s); return loadJPG(s);
} }
#endif #endif
#if defined(HAVE_PNG) #if defined(NV_HAVE_PNG)
if (strCaseDiff(extension, ".png") == 0) { if (strCaseDiff(extension, ".png") == 0) {
return loadPNG(s); return loadPNG(s);
} }
#endif #endif
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) {
return loadFreeImage(fif, s); return loadFreeImage(fif, s);
} }
#endif #endif
#if defined(HAVE_STBIMAGE) #if defined(NV_HAVE_STBIMAGE)
return loadSTB(s); return loadSTB(s);
#endif #endif
return NULL; return NULL;
} }
bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/) bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/)
{ {
nvDebugCheck(fileName != NULL); nvDebugCheck(fileName != NULL);
@ -1838,6 +1883,10 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
const char * extension = Path::extension(fileName); const char * extension = Path::extension(fileName);
if (strCaseDiff(extension, ".bmp") == 0) {
return saveBMP(s, img);
}
if (strCaseDiff(extension, ".tga") == 0) { if (strCaseDiff(extension, ".tga") == 0) {
return saveTGA(s, img); return saveTGA(s, img);
} }
@ -1846,13 +1895,13 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
return savePPM(s, img); return savePPM(s, img);
} }
#if defined(HAVE_PNG) #if defined(NV_HAVE_PNG)
if (strCaseDiff(extension, ".png") == 0) { if (strCaseDiff(extension, ".png") == 0) {
return savePNG(s, img, tags); return savePNG(s, img, tags);
} }
#endif #endif
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
return saveFreeImage(fif, s, img, tags); return saveFreeImage(fif, s, img, tags);
@ -1899,27 +1948,27 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
return loadFloatPFM(s); return loadFloatPFM(s);
}*/ }*/
#if defined(HAVE_TIFF) #if defined(NV_HAVE_TIFF)
#pragma NV_MESSAGE("TODO: Load TIFF from stream.") #pragma NV_MESSAGE("TODO: Load TIFF from stream.")
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) { if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
return loadFloatTIFF(fileName, s); return loadFloatTIFF(fileName, s);
} }
#endif #endif
#if defined(HAVE_OPENEXR) #if defined(NV_HAVE_OPENEXR)
#pragma NV_MESSAGE("TODO: Load EXR from stream.") #pragma NV_MESSAGE("TODO: Load EXR from stream.")
if (strCaseDiff(extension, ".exr") == 0) { if (strCaseDiff(extension, ".exr") == 0) {
return loadFloatEXR(fileName, s); return loadFloatEXR(fileName, s);
} }
#endif #endif
#if defined(HAVE_STBIMAGE) #if defined(NV_HAVE_STBIMAGE)
if (strCaseDiff(extension, ".hdr") == 0) { if (strCaseDiff(extension, ".hdr") == 0) {
return loadFloatSTB(s); return loadFloatSTB(s);
} }
#endif #endif
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) { if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) {
return loadFloatFreeImage(fif, s); return loadFloatFreeImage(fif, s);
@ -1961,7 +2010,7 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
return saveFloatPFM(s, fimage, baseComponent, componentCount); return saveFloatPFM(s, fimage, baseComponent, componentCount);
}*/ }*/
#if defined(HAVE_FREEIMAGE) #if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName); FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) { if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount); return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount);
@ -2005,14 +2054,15 @@ bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, ui
} }
const char * extension = Path::extension(fileName); const char * extension = Path::extension(fileName);
NV_UNUSED(extension);
#if defined(HAVE_OPENEXR) #if defined(NV_HAVE_OPENEXR)
if (strCaseDiff(extension, ".exr") == 0) { if (strCaseDiff(extension, ".exr") == 0) {
return saveFloatEXR(fileName, fimage, baseComponent, componentCount); return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
} }
#endif #endif
#if defined(HAVE_TIFF) #if defined(NV_HAVE_TIFF)
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) { if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
return saveFloatTIFF(fileName, fimage, baseComponent, componentCount); return saveFloatTIFF(fileName, fimage, baseComponent, componentCount);
} }

View File

@ -1,6 +1,7 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com> // This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#include "KtxFile.h" #include "KtxFile.h"
#include "nvcore/StdStream.h"
using namespace nv; using namespace nv;
@ -10,6 +11,8 @@ static const uint8 fileIdentifier[12] = {
0x0D, 0x0A, 0x1A, 0x0A 0x0D, 0x0A, 0x1A, 0x0A
}; };
namespace nv
{
KtxHeader::KtxHeader() { KtxHeader::KtxHeader() {
memcpy(identifier, fileIdentifier, 12); memcpy(identifier, fileIdentifier, 12);
@ -19,8 +22,8 @@ KtxHeader::KtxHeader() {
glType = 0; glType = 0;
glTypeSize = 1; glTypeSize = 1;
glFormat = 0; glFormat = 0;
glInternalFormat = KTX_RGBA; glInternalFormat = KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1;
glBaseInternalFormat = KTX_RGBA; glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
pixelWidth = 0; pixelWidth = 0;
pixelHeight = 0; pixelHeight = 0;
pixelDepth = 0; pixelDepth = 0;
@ -31,9 +34,9 @@ KtxHeader::KtxHeader() {
} }
Stream & operator<< (Stream & s, DDSHeader & header) { Stream & operator<< (Stream & s, KtxHeader & header) {
s.serialize(header.identifier, 12); s.serialize(header.identifier, 12);
s << header.endiannes << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat; s << header.endianness << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat;
s << header.pixelWidth << header.pixelHeight << header.pixelDepth; s << header.pixelWidth << header.pixelHeight << header.pixelDepth;
s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels; s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels;
s << header.bytesOfKeyValueData; s << header.bytesOfKeyValueData;
@ -41,7 +44,7 @@ Stream & operator<< (Stream & s, DDSHeader & header) {
} }
KtxFile::KtxFile() { /*KtxFile::KtxFile() {
} }
KtxFile::~KtxFile() { KtxFile::~KtxFile() {
} }
@ -49,7 +52,7 @@ KtxFile::~KtxFile() {
void KtxFile::addKeyValue(const char * key, const char * value) { void KtxFile::addKeyValue(const char * key, const char * value) {
keyArray.append(key); keyArray.append(key);
valueArray.append(value); valueArray.append(value);
bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1; header.bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1;
} }
@ -77,7 +80,8 @@ Stream & operator<< (Stream & s, KtxFile & file) {
} }
return s; return s;
} }*/
} // nv

View File

@ -6,6 +6,7 @@
#include "nvimage.h" #include "nvimage.h"
#include "nvcore/StrLib.h" #include "nvcore/StrLib.h"
#include "nvcore/Array.h"
// KTX File format specification: // KTX File format specification:
// http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key // http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key
@ -14,22 +15,99 @@ namespace nv
{ {
class Stream; class Stream;
// GL types (Table 3.2) // GL types
const uint KTX_UNSIGNED_BYTE; const uint KTX_UNSIGNED_BYTE = 0x1401;
const uint KTX_UNSIGNED_SHORT_5_6_5; const uint KTX_BYTE = 0x1400;
// ... const uint KTX_UNSIGNED_SHORT = 0x1403;
const uint KTX_SHORT = 0x1402;
const uint KTX_UNSIGNED_INT = 0x1405;
const uint KTX_INT = 0x1404;
const uint KTX_FLOAT = 0x1406;
const uint KTX_UNSIGNED_BYTE_3_3_2 = 0x8032;
const uint KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362;
const uint KTX_UNSIGNED_SHORT_5_6_5 = 0x8363;
const uint KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364;
const uint KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033;
const uint KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365;
const uint KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034;
const uint KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366;
const uint KTX_UNSIGNED_INT_8_8_8_8 = 0x8035;
const uint KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367;
const uint KTX_UNSIGNED_INT_10_10_10_2 = 0x8036;
const uint KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368;
// GL formats (Table 3.3) // GL formats
// ... const uint KTX_FORMAT_RED = 0x1903;
const uint KTX_FORMAT_RG = 0x8227;
const uint KTX_FORMAT_RGB = 0x1907;
const uint KTX_FORMAT_BGR = 0x80E0;
const uint KTX_FORMAT_RGBA = 0x1908;
const uint KTX_FORMAT_BGRA = 0x80E1;
const uint KTX_FORMAT_RED_INTEGER = 0x8D94;
const uint KTX_FORMAT_RG_INTEGER = 0x8228;
const uint KTX_FORMAT_RGB_INTEGER = 0x8D98;
const uint KTX_FORMAT_BGR_INTEGER = 0x8D9A;
const uint KTX_FORMAT_RGBA_INTEGER = 0x8D99;
const uint KTX_FORMAT_BGRA_INTEGER = 0x8D9B;
const uint KTX_FORMAT_STENCIL_INDEX = 0x1901;
const uint KTX_FORMAT_DEPTH_COMPONENT = 0x1902;
const uint KTX_FORMAT_DEPTH_STENCIL = 0x84F9;
// GL internal formats (Table 3.12, 3.13) // GL internal formats
// ... // BC1
const uint KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1 = 0x83F0;
const uint KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 = 0x8C4C;
// BC1a
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1 = 0x83F1;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 = 0x8C4D;
// BC2
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3 = 0x83F2;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 = 0x8C4E;
// BC3
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5 = 0x83F3;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 = 0x8C4F;
// BC4
const uint KTX_INTERNAL_COMPRESSED_RED_RGTC1 = 0x8DBB;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 = 0x8DBC;
// BC5
const uint KTX_INTERNAL_COMPRESSED_RG_RGTC2 = 0x8DBD;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 = 0x8DBE;
// BC6
const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F;
const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E;
// BC7
const uint KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D;
// GL base internal format. (Table 3.11) // ETC
const uint KTX_RGB; const uint KTX_INTERNAL_COMPRESSED_RGB_ETC1 = 0x8D64;
const uint KTX_RGBA; const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC1 = 0x8D64; // ???
const uint KTX_ALPHA;
// ... // ETC2
const uint KTX_INTERNAL_COMPRESSED_RED_EAC = 0x9270;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_EAC = 0x9271;
const uint KTX_INTERNAL_COMPRESSED_RG_EAC = 0x9272;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_EAC = 0x9273;
const uint KTX_INTERNAL_COMPRESSED_RGB_ETC2 = 0x9274;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC2 = 0x9275;
const uint KTX_INTERNAL_COMPRESSED_RGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9276;
const uint KTX_INTERNAL_COMPRESSED_SRGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9277;
const uint KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC = 0x9278;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC = 0x9279;
// GL base internal formats
const uint KTX_BASE_INTERNAL_DEPTH_COMPONENT = 0x1902;
const uint KTX_BASE_INTERNAL_DEPTH_STENCIL = 0x84F9;
const uint KTX_BASE_INTERNAL_RED = 0x1903;
const uint KTX_BASE_INTERNAL_RG = 0x8227;
const uint KTX_BASE_INTERNAL_RGB = 0x1907;
const uint KTX_BASE_INTERNAL_RGBA = 0x1908;
const uint KTX_BASE_INTERNAL_STENCIL_INDEX = 0x1901;
struct KtxHeader { struct KtxHeader {
@ -52,10 +130,10 @@ namespace nv
}; };
NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header); NVIMAGE_API Stream & operator<< (Stream & s, KtxHeader & header);
struct KtxFile { /* struct KtxFile {
KtxFile(); KtxFile();
~KtxFile(); ~KtxFile();
@ -66,10 +144,9 @@ namespace nv
Array<String> keyArray; Array<String> keyArray;
Array<String> valueArray; Array<String> valueArray;
}; };
NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file); NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file);*/
/* /*

View File

@ -101,6 +101,48 @@ inline Stream & operator<< (Stream & s, TgaFile & tga)
return s; return s;
} }
// @@ Move to BMP file?
const int BITMAPFILEHEADER_SIZE = 14;
const int BITMAPINFOHEADER_SIZE = 40;
const int BM_TYPE = ((unsigned int)'M') << 8 | ((unsigned int)'B');
// BMP Header.
struct BmpFileHeader {
uint16 type;
uint32 size;
uint16 reserved1;
uint16 reserved2;
uint32 offBits;
};
struct BmpInfoHeader {
uint32 size;
uint32 width;
uint32 height;
uint16 planes;
uint16 bitCount;
uint32 compression;
uint32 sizeImage;
uint32 xPelsPerMeter;
uint32 yPelsPerMeter;
uint32 clrUsed;
uint32 clrImportant;
};
inline Stream & operator<< (Stream & s, BmpFileHeader & bmp) {
return s << bmp.type << bmp.size << bmp.reserved1 << bmp.reserved2 << bmp.offBits;
}
inline Stream & operator<< (Stream & s, BmpInfoHeader & bmp) {
s << bmp.size << bmp.width << bmp.height << bmp.planes << bmp.bitCount << bmp.compression << bmp.sizeImage;
s << bmp.xPelsPerMeter << bmp.yPelsPerMeter << bmp.clrUsed << bmp.clrImportant;
return s;
}
} // nv namespace } // nv namespace
#endif // NV_IMAGE_TGAFILE_H #endif // NV_IMAGE_TGAFILE_H

View File

@ -7,7 +7,7 @@ SET(MATH_SRCS
Fitting.h Fitting.cpp Fitting.h Fitting.cpp
Gamma.h Gamma.cpp Gamma.h Gamma.cpp
Half.h Half.cpp Half.h Half.cpp
Matrix.h Matrix.h Matrix.inl Matrix.cpp
Plane.h Plane.inl Plane.cpp Plane.h Plane.inl Plane.cpp
SphericalHarmonic.h SphericalHarmonic.cpp SphericalHarmonic.h SphericalHarmonic.cpp
SimdVector.h SimdVector_SSE.h SimdVector_VE.h SimdVector.h SimdVector_SSE.h SimdVector_VE.h

View File

@ -157,6 +157,12 @@ namespace nv
return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale); return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale);
} }
inline Vector3 toVector3(Color32 c)
{
const float scale = 1.0f / 255.0f;
return Vector3(c.r * scale, c.g * scale, c.b * scale);
}
inline float perceptualColorDistance(Vector3::Arg c0, Vector3::Arg c1) inline float perceptualColorDistance(Vector3::Arg c0, Vector3::Arg c1)
{ {

View File

@ -197,6 +197,36 @@ bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
return true; return true;
} }
bool nv::solveLU(const Matrix2 & A, const Vector2 & b, Vector2 * x)
{
nvDebugCheck(x != NULL);
float m[2][2];
float *a[2] = {m[0], m[1]};
int idx[2];
float d;
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
a[x][y] = A(x, y);
}
}
// Create LU decomposition.
if (!ludcmp(a, 2, idx, &d)) {
// Singular matrix.
return false;
}
// Init solution.
*x = b;
// Do back substitution.
lubksb(a, 2, idx, x->component);
return true;
}
bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x) bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
{ {
@ -223,6 +253,22 @@ bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
return true; return true;
} }
bool nv::solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x)
{
nvDebugCheck(x != NULL);
const float det = A.determinant();
if (equal(det, 0.0f)) { // @@ Use input epsilon.
return false;
}
Matrix2 Ai = inverseCramer(A);
*x = transform(Ai, b);
return true;
}
// Inverse using gaussian elimination. From Jon's code. // Inverse using gaussian elimination. From Jon's code.

View File

@ -14,6 +14,46 @@ namespace nv
{ {
enum identity_t { identity }; enum identity_t { identity };
// 2x2 matrix.
class NVMATH_CLASS Matrix2
{
public:
Matrix2();
explicit Matrix2(float f);
explicit Matrix2(identity_t);
Matrix2(const Matrix2 & m);
Matrix2(Vector2::Arg v0, Vector2::Arg v1);
Matrix2(float a, float b, float c, float d);
float data(uint idx) const;
float & data(uint idx);
float get(uint row, uint col) const;
float operator()(uint row, uint col) const;
float & operator()(uint row, uint col);
Vector2 row(uint i) const;
Vector2 column(uint i) const;
void operator*=(float s);
void operator/=(float s);
void operator+=(const Matrix2 & m);
void operator-=(const Matrix2 & m);
void scale(float s);
void scale(Vector2::Arg s);
float determinant() const;
private:
float m_data[4];
};
// Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix2 & m, const Vector2 & b, Vector2 * x);
// Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x);
// 3x3 matrix. // 3x3 matrix.
class NVMATH_CLASS Matrix3 class NVMATH_CLASS Matrix3
{ {
@ -52,6 +92,8 @@ namespace nv
// Solve equation system using Cramer's inverse. // Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x); extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
extern Matrix3 inverse(const Matrix3 & m);
// 4x4 matrix. // 4x4 matrix.
class NVMATH_CLASS Matrix class NVMATH_CLASS Matrix
@ -106,7 +148,6 @@ namespace nv
// Compute inverse using Gaussian elimination and partial pivoting. // Compute inverse using Gaussian elimination and partial pivoting.
extern Matrix inverse(const Matrix & m); extern Matrix inverse(const Matrix & m);
extern Matrix3 inverse(const Matrix3 & m);
} // nv namespace } // nv namespace

View File

@ -8,6 +8,199 @@
namespace nv namespace nv
{ {
inline Matrix2::Matrix2() {}
inline Matrix2::Matrix2(float f)
{
for(int i = 0; i < 4; i++) {
m_data[i] = f;
}
}
inline Matrix2::Matrix2(identity_t)
{
for(int i = 0; i < 2; i++) {
for(int j = 0; j < 2; j++) {
m_data[2*j+i] = (i == j) ? 1.0f : 0.0f;
}
}
}
inline Matrix2::Matrix2(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] = m.m_data[i];
}
}
inline Matrix2::Matrix2(Vector2::Arg v0, Vector2::Arg v1)
{
m_data[0] = v0.x; m_data[1] = v0.y;
m_data[2] = v1.x; m_data[3] = v1.y;
}
inline Matrix2::Matrix2(float a, float b, float c, float d)
{
m_data[0] = a; m_data[1] = b;
m_data[2] = c; m_data[3] = d;
}
inline float Matrix2::data(uint idx) const
{
nvDebugCheck(idx < 4);
return m_data[idx];
}
inline float & Matrix2::data(uint idx)
{
nvDebugCheck(idx < 4);
return m_data[idx];
}
inline float Matrix2::get(uint row, uint col) const
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline float Matrix2::operator()(uint row, uint col) const
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline float & Matrix2::operator()(uint row, uint col)
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline Vector2 Matrix2::row(uint i) const
{
nvDebugCheck(i < 2);
return Vector2(get(i, 0), get(i, 1));
}
inline Vector2 Matrix2::column(uint i) const
{
nvDebugCheck(i < 2);
return Vector2(get(0, i), get(1, i));
}
inline void Matrix2::operator*=(float s)
{
for(int i = 0; i < 4; i++) {
m_data[i] *= s;
}
}
inline void Matrix2::operator/=(float s)
{
float is = 1.0f /s;
for(int i = 0; i < 4; i++) {
m_data[i] *= is;
}
}
inline void Matrix2::operator+=(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] += m.m_data[i];
}
}
inline void Matrix2::operator-=(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] -= m.m_data[i];
}
}
inline Matrix2 operator+(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m = a;
m += b;
return m;
}
inline Matrix2 operator-(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m = a;
m -= b;
return m;
}
inline Matrix2 operator*(const Matrix2 & a, float s)
{
Matrix2 m = a;
m *= s;
return m;
}
inline Matrix2 operator*(float s, const Matrix2 & a)
{
Matrix2 m = a;
m *= s;
return m;
}
inline Matrix2 operator/(const Matrix2 & a, float s)
{
Matrix2 m = a;
m /= s;
return m;
}
inline Matrix2 mul(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m;
for(int i = 0; i < 2; i++) {
const float ai0 = a(i,0), ai1 = a(i,1);
m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0);
m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1);
}
return m;
}
inline Matrix2 operator*(const Matrix2 & a, const Matrix2 & b)
{
return mul(a, b);
}
// Transform the given 3d vector with the given matrix.
inline Vector2 transform(const Matrix2 & m, const Vector2 & p)
{
return Vector2(p.x * m(0,0) + p.y * m(0,1),
p.x * m(1,0) + p.y * m(1,1));
}
inline void Matrix2::scale(float s)
{
for (int i = 0; i < 4; i++) {
m_data[i] *= s;
}
}
inline void Matrix2::scale(Vector2::Arg s)
{
m_data[0] *= s.x; m_data[1] *= s.x;
m_data[2] *= s.y; m_data[3] *= s.y;
}
inline float Matrix2::determinant() const
{
return get(0,0) * get(1,1) - get(0,1) * get(1,0);
}
// Inverse using Cramer's rule.
inline Matrix2 inverseCramer(const Matrix2 & m)
{
const float det = m.determinant();
if (equal(det, 0.0f, 0.0f)) {
return Matrix2(0);
}
return m * (1/det);
}
inline Matrix3::Matrix3() {} inline Matrix3::Matrix3() {}
inline Matrix3::Matrix3(float f) inline Matrix3::Matrix3(float f)
@ -794,7 +987,7 @@ v1 = FXVector3.Cross(v3, v2);
v1.Normalize(); v1.Normalize();
Matrix R = Matrix::Identity; Matrix R = Matrix::Identity;
R[0, 0] = v3.X; // Not sure this is in the correct order... R[0, 0] = v3.X; // Not sure this is in the correct order...
R[1, 0] = v3.Y; R[1, 0] = v3.Y;
R[2, 0] = v3.Z; R[2, 0] = v3.Z;
R[0, 1] = v1.X; R[0, 1] = v1.X;

View File

@ -7,10 +7,6 @@
#include "nvmath.h" #include "nvmath.h"
#include "Vector.h" #include "Vector.h"
#if NV_USE_ALTIVEC
#undef vector
#endif
namespace nv namespace nv
{ {
class Matrix; class Matrix;
@ -29,6 +25,7 @@ namespace nv
Vector3 vector() const; Vector3 vector() const;
float offset() const; float offset() const;
Vector3 normal() const;
void operator*=(float s); void operator*=(float s);

View File

@ -24,6 +24,7 @@ namespace nv
inline Vector3 Plane::vector() const { return v.xyz(); } inline Vector3 Plane::vector() const { return v.xyz(); }
inline float Plane::offset() const { return v.w; } inline float Plane::offset() const { return v.w; }
inline Vector3 Plane::normal() const { return normalize(vector(), 0.0f); }
// Normalize plane. // Normalize plane.
inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON) inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)

View File

@ -452,7 +452,7 @@ namespace nv
// That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point
// numbers and the results becomes very unstable and dependent on the order of the factors. // numbers and the results becomes very unstable and dependent on the order of the factors.
// Instead, it's preferable to substract the vertices first, and multiply the resulting small values together. The result // Instead, it's preferable to subtract the vertices first, and multiply the resulting small values together. The result
// in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of
// the triangle. // the triangle.

View File

@ -194,15 +194,20 @@ namespace nv
#endif #endif
} }
inline uint log2(uint i) inline uint log2(uint32 i)
{ {
uint value = 0; uint32 value = 0;
while( i >>= 1 ) { while( i >>= 1 ) value++;
value++;
}
return value; return value;
} }
inline uint log2(uint64 i)
{
uint64 value = 0;
while (i >>= 1) value++;
return U32(value);
}
inline float lerp(float f0, float f1, float t) inline float lerp(float f0, float f1, float t)
{ {
const float s = 1.0f - t; const float s = 1.0f - t;

View File

@ -107,6 +107,11 @@ namespace nv {
#endif #endif
} }
inline void storeRelease(volatile float * ptr, float value)
{
storeRelease((uint32 *)ptr, *(uint32 *)&value);
}
template <typename T> template <typename T>
inline void storeReleasePointer(volatile T * pTo, T from) inline void storeReleasePointer(volatile T * pTo, T from)

View File

@ -17,7 +17,7 @@ struct Event::Private {
}; };
Event::Event() : m(new Private) { Event::Event() : m(new Private) {
m->handle = CreateEvent(NULL, FALSE, FALSE, NULL); m->handle = CreateEvent(/*lpEventAttributes=*/NULL, /*bManualReset=*/FALSE, /*bInitialState=*/FALSE, /*lpName=*/NULL);
} }
Event::~Event() { Event::~Event() {

View File

@ -13,7 +13,9 @@
#endif // NV_OS #endif // NV_OS
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h> #include <telemetry.h>
extern HTELEMETRY tmContext; extern HTELEMETRY tmContext;
#endif #endif
@ -45,14 +47,19 @@ Mutex::~Mutex ()
void Mutex::lock() void Mutex::lock()
{ {
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmStartWaitForLock(0, 0, this, m->name);
#elif NV_USE_TELEMETRY
TmU64 matcher; TmU64 matcher;
tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked");
#endif #endif
EnterCriticalSection(&m->mutex); EnterCriticalSection(&m->mutex);
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmEndWaitForLock(0);
tmAcquiredLock(0, 0, this, m->name);
#elif NV_USE_TELEMETRY
tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS); tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS);
tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired"); tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired");
#endif #endif
@ -60,7 +67,18 @@ void Mutex::lock()
bool Mutex::tryLock() bool Mutex::tryLock()
{ {
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmStartWaitForLock(0, 0, this, m->name);
if (TryEnterCriticalSection(&m->mutex) != 0) {
tmEndWaitForLock(0);
tmAcquiredLock(0, 0, this, m->name);
return true;
}
else {
tmEndWaitForLock(0);
return false;
}
#elif NV_USE_TELEMETRY
TmU64 matcher; TmU64 matcher;
tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked"); tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked");
if (TryEnterCriticalSection(&m->mutex) != 0) { if (TryEnterCriticalSection(&m->mutex) != 0) {
@ -79,7 +97,9 @@ bool Mutex::tryLock()
void Mutex::unlock() void Mutex::unlock()
{ {
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmReleasedLock(0, this);
#elif NV_USE_TELEMETRY
tmSetLockState(tmContext, this, TMLS_RELEASED, "released"); tmSetLockState(tmContext, this, TMLS_RELEASED, "released");
#endif #endif
@ -90,13 +110,17 @@ void Mutex::unlock()
struct Mutex::Private { struct Mutex::Private {
pthread_mutex_t mutex; pthread_mutex_t mutex;
pthread_mutexattr_t attr;
const char * name; const char * name;
}; };
Mutex::Mutex (const char * name) : m(new Private) Mutex::Mutex (const char * name) : m(new Private)
{ {
int result = pthread_mutex_init(&m->mutex, NULL); pthread_mutexattr_init(&m->attr);
pthread_mutexattr_settype(&m->attr, PTHREAD_MUTEX_RECURSIVE);
int result = pthread_mutex_init(&m->mutex, &m->attr);
//m->mutex = PTHREAD_MUTEX_INITIALIZER;
m->name = name; m->name = name;
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
} }
@ -105,6 +129,8 @@ Mutex::~Mutex ()
{ {
int result = pthread_mutex_destroy(&m->mutex); int result = pthread_mutex_destroy(&m->mutex);
nvDebugCheck(result == 0); nvDebugCheck(result == 0);
result = pthread_mutexattr_destroy(&m->attr);
nvDebugCheck(result == 0);
} }
void Mutex::lock() void Mutex::lock()

View File

@ -9,7 +9,11 @@
#include <unistd.h> // usleep #include <unistd.h> // usleep
#endif #endif
#if NV_USE_TELEMETRY #include "nvcore/StrLib.h"
#if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h> #include <telemetry.h>
extern HTELEMETRY tmContext; extern HTELEMETRY tmContext;
#endif #endif
@ -118,16 +122,12 @@ void Thread::start(ThreadFunc * func, void * arg)
nvDebugCheck(p->thread != NULL); nvDebugCheck(p->thread != NULL);
if (p->name != NULL) { if (p->name != NULL) {
setThreadName(threadId, p->name); setThreadName(threadId, p->name);
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmThreadName(0, threadId, p->name);
#elif NV_USE_TELEMETRY
tmThreadName(tmContext, threadId, p->name); tmThreadName(tmContext, threadId, p->name);
#endif #endif
} }
#elif NV_OS_ORBIS
int ret = scePthreadCreate(&p->thread, NULL, threadFunc, p.ptr(), p->name ? p->name : "nv::Thread");
nvDebugCheck(ret == 0);
// use any non-system core
scePthreadSetaffinity(p->thread, 0x3F);
scePthreadSetprio(p->thread, (SCE_KERNEL_PRIO_FIFO_DEFAULT + SCE_KERNEL_PRIO_FIFO_HIGHEST) / 2);
#elif NV_OS_USE_PTHREAD #elif NV_OS_USE_PTHREAD
int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr()); int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr());
nvDebugCheck(result == 0); nvDebugCheck(result == 0);

View File

@ -8,7 +8,9 @@
#include "nvcore/Utils.h" #include "nvcore/Utils.h"
#include "nvcore/StrLib.h" #include "nvcore/StrLib.h"
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h> #include <telemetry.h>
extern HTELEMETRY tmContext; extern HTELEMETRY tmContext;
#endif #endif
@ -84,7 +86,9 @@ AutoPtr<ThreadPool> s_pool;
} }
{ {
#if NV_USE_TELEMETRY #if NV_USE_TELEMETRY3
tmZone(0, TMZF_NONE, "worker");
#elif NV_USE_TELEMETRY
tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker"); tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker");
#endif #endif
func(s_pool->arg, s_pool->useCallingThread + i); func(s_pool->arg, s_pool->useCallingThread + i);
@ -116,11 +120,11 @@ ThreadPool::ThreadPool(uint workerCount/*=processorCount()*/, bool useThreadAffi
lockThreadToProcessor(0); // Calling thread always locked to processor 0. lockThreadToProcessor(0); // Calling thread always locked to processor 0.
} }
StringBuilder name;
for (uint i = 0; i < threadCount; i++) { for (uint i = 0; i < threadCount; i++) {
StringBuilder name;
name.format("worker %d", i); name.format("worker %d", i);
workers[i].setName(name.release()); // @Leak workers[i].setName(name.release()); // @Leak
workers[i].start(workerFunc, (void *)i); workers[i].start(workerFunc, (void *)(uintptr_t)i);
} }
allIdle = true; allIdle = true;
@ -141,9 +145,6 @@ ThreadPool::~ThreadPool()
void ThreadPool::run(ThreadTask * func, void * arg) void ThreadPool::run(ThreadTask * func, void * arg)
{ {
// Wait until threads are idle.
wait();
start(func, arg); start(func, arg);
if (useCallingThread) { if (useCallingThread) {

View File

@ -85,7 +85,9 @@ uint nv::processorCount() {
return count; return count;
#elif NV_OS_ORBIS #elif NV_OS_ORBIS
return 6; return 6;
#elif NV_OS_DURANGO
return 6;
#elif NV_OS_XBOX #elif NV_OS_XBOX
return 3; // or 6? return 3; // or 6?
#elif NV_OS_LINUX || NV_OS_NETBSD // Linux, Solaris, & AIX #elif NV_OS_LINUX || NV_OS_NETBSD // Linux, Solaris, & AIX

View File

@ -25,6 +25,7 @@
#include "BlockCompressor.h" #include "BlockCompressor.h"
#include "OutputOptions.h" #include "OutputOptions.h"
#include "TaskDispatcher.h" #include "TaskDispatcher.h"
#include "CompressionOptions.h"
#include "nvimage/Image.h" #include "nvimage/Image.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
@ -33,6 +34,7 @@
#include "nvmath/Vector.inl" #include "nvmath/Vector.inl"
#include "nvcore/Memory.h" #include "nvcore/Memory.h"
#include "nvcore/Array.inl"
#include <new> // placement new #include <new> // placement new
@ -40,85 +42,13 @@
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
/*
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
#if defined(HAVE_OPENMP)
bool singleThreaded = false;
#else
bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{
nvDebugCheck(bs <= 16);
uint8 mem[16]; // @@ Output one row at a time!
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
rgba.init(w, h, data, x, y);
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs);
}
}
}
}
#if defined(HAVE_OPENMP)
else
{
const uint size = bs * bw * bh;
uint8 * mem = new uint8[size];
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < int(bw*bh); i++)
{
const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
rgba.init(w, h, data, 4*x, 4*y);
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
}
#endif
}
*/
struct CompressorContext struct CompressorContext
{ {
nvtt::AlphaMode alphaMode; AlphaMode alphaMode;
uint w, h, d; uint w, h, d;
const float * data; const float * data;
const nvtt::CompressionOptions::Private * compressionOptions; const CompressionOptions::Private * compressionOptions;
uint bw, bh, bs; uint bw, bh, bs;
uint8 * mem; uint8 * mem;
@ -144,7 +74,7 @@ void ColorBlockCompressorTask(void * data, int i)
} }
} }
void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) void ColorBlockCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
nvDebugCheck(d == 1); nvDebugCheck(d == 1);
@ -182,66 +112,6 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
delete [] context.mem; delete [] context.mem;
} }
#if 0
// Each task compresses one block.
void ColorSetCompressorTask(void * data, int i)
{
CompressorContext * d = (CompressorContext *) data;
uint x = i % d->bw;
uint y = i / d->bw;
//for (uint x = 0; x < d->bw; x++)
{
ColorSet set;
set.setColors(d->data, d->w, d->h, x * 4, y * 4);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
((ColorSetCompressor *)d->compressor)->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr);
}
}
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
CompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
context.compressor = this;
SequentialTaskDispatcher sequential;
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
#if _DEBUG
dispatcher = &sequential;
#endif
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(ColorSetCompressorTask, &context, count);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}
#endif // 0
// Each task compresses one block. // Each task compresses one block.
void FloatColorCompressorTask(void * data, int i) void FloatColorCompressorTask(void * data, int i)
{ {
@ -262,8 +132,8 @@ void FloatColorCompressorTask(void * data, int i)
Vector4 colors[16]; Vector4 colors[16];
float weights[16]; float weights[16];
const uint block_w = min(d->w - block_x * 4U, 4U); const uint block_w = min(d->w - block_x * 4, 4U);
const uint block_h = min(d->h - block_y * 4U, 4U); const uint block_h = min(d->h - block_y * 4, 4U);
uint x, y; uint x, y;
for (y = 0; y < block_h; y++) { for (y = 0; y < block_h; y++) {
@ -274,7 +144,7 @@ void FloatColorCompressorTask(void * data, int i)
colors[dst_idx].y = g[src_idx]; colors[dst_idx].y = g[src_idx];
colors[dst_idx].z = b[src_idx]; colors[dst_idx].z = b[src_idx];
colors[dst_idx].w = a[src_idx]; colors[dst_idx].w = a[src_idx];
weights[dst_idx] = (d->alphaMode == nvtt::AlphaMode_Transparency) ? a[src_idx] : 1.0f; weights[dst_idx] = (d->alphaMode == AlphaMode_Transparency) ? saturate(a[src_idx]) : 1.0f;
} }
for (; x < 4; x++) { for (; x < 4; x++) {
uint dst_idx = 4 * y + x; uint dst_idx = 4 * y + x;
@ -296,7 +166,7 @@ void FloatColorCompressorTask(void * data, int i)
} }
void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures. nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures.
@ -308,7 +178,7 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
context.data = data; context.data = data;
context.compressionOptions = &compressionOptions; context.compressionOptions = &compressionOptions;
context.bs = blockSize(); context.bs = blockSize(compressionOptions);
context.bw = (w + 3) / 4; context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4; context.bh = (h + 3) / 4;
@ -333,3 +203,466 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
delete [] context.mem; delete [] context.mem;
} }
// BC1
#include "CompressorDXT1.h"
void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1_fast(colors, weights, compressionOptions.colorWeight.xyz(), (BlockDXT1 *)output);
}
void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output);
}
// @@ BC1a
// @@ BC2
// @@ BC3
// BC3_RGBM
#include "CompressorDXT5_RGBM.h"
void CompressorBC3_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt5_rgbm(colors, weights, compressionOptions.rgbmThreshold, (BlockDXT5 *)output);
}
// ETC
#include "CompressorETC.h"
void CompressorETC1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_etc1(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
void CompressorETC2_R::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Change radius based on quality.
compress_eac(colors, weights, /*input_channel=*/1, /*search_radius=*/1, /*use_11bit_mode=*/true, output);
}
void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
//compress_eac_rg(colors, weights, 1, 2, output);
}
void CompressorETC2_RGB::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Tweak quality options.
compress_etc2(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
void CompressorETC2_RGBA::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Tweak quality options.
// @@ Change radius based on quality.
compress_etc2_eac(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
/*void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Change radius based on quality.
compress_eac_rg(colors, weights, compressionOptions.colorWeight.xyz(), output);
}*/
void CompressorETC2_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_etc2_rgbm(colors, weights, compressionOptions.rgbmThreshold, output);
}
// External compressors.
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
void AtiCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
// @@ Floating point input is not swizzled.
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
void SquishCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
nvDebugCheck(false);
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)
#if defined(HAVE_ETCLIB)
#include "Etc.h"
void EtcLibCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
//nvCheck(d == 1); // Encode one layer at a time?
Etc::Image::Format format;
if (compressionOptions.format == Format_ETC1) {
format = Etc::Image::Format::ETC1;
}
else if (compressionOptions.format == Format_ETC2_R) {
format = Etc::Image::Format::R11;
}
else if (compressionOptions.format == Format_ETC2_RG) {
format = Etc::Image::Format::RG11;
}
else if (compressionOptions.format == Format_ETC2_RGB) {
format = Etc::Image::Format::RGB8;
//format = Etc::Image::Format::SRGB8;
}
else if (compressionOptions.format == Format_ETC2_RGBA) {
format = Etc::Image::Format::RGBA8;
//format = Etc::Image::Format::SRGBA8;
}
else if (compressionOptions.format == Format_ETC2_RGB_A1) {
format = Etc::Image::Format::RGB8A1;
//format = Etc::Image::Format::SRGB8A1;
}
else {
nvCheck(false);
return;
}
Etc::ErrorMetric error_metric = Etc::ErrorMetric::RGBA;
// @@ Use normal compression metric for normals?
//if (compressionOptions.)
// @@ Adjust based on quality.
int effort = ETCCOMP_DEFAULT_EFFORT_LEVEL;
// @@ What are the defaults?
uint jobs = 4;
uint max_jobs = 4;
uint8 * out_data = NULL;
uint out_size = 0;
uint out_width = 0;
uint out_height = 0;
int out_time = 0;
// Swizzle color data.
nv::Array<float> tmp;
uint count = w * h;
tmp.resize(4 * count);
for (uint i = 0; i < count; i++) {
tmp[4*i+0] = data[count*0 + i];
tmp[4*i+1] = data[count*1 + i];
tmp[4*i+2] = data[count*2 + i];
tmp[4*i+3] = data[count*3 + i];
}
Etc::Encode(tmp.buffer(), w, h, format, error_metric, effort, jobs, max_jobs, &out_data, &out_size, &out_width, &out_height, &out_time);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(out_data, I32(out_size));
}
}
#endif
#if defined(HAVE_RGETC)
#include "rg_etc1.h"
NV_AT_STARTUP(rg_etc1::pack_etc1_block_init());
void RgEtcCompressor::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
rg_etc1::etc1_pack_params pack_params;
pack_params.m_quality = rg_etc1::cMediumQuality;
if (compressionOptions.quality == Quality_Fastest) pack_params.m_quality = rg_etc1::cLowQuality;
else if (compressionOptions.quality == Quality_Production) pack_params.m_quality = rg_etc1::cHighQuality;
else if (compressionOptions.quality == Quality_Highest) pack_params.m_quality = rg_etc1::cHighQuality;
else if (compressionOptions.quality == Quality_Normal) pack_params.m_quality = rg_etc1::cMediumQuality;
rgba.swizzle(2, 1, 0, 3);
rg_etc1::pack_etc1_block(output, (uint *)rgba.colors(), pack_params);
//Vector4 result[16];
//nv::decompress_etc(output, result);
}
#endif
#if defined(HAVE_PVRTEXTOOL)
#include <PVRTextureUtilities.h> // for CPVRTexture, CPVRTextureHeader, PixelType, Transcode
#include "nvmath/Color.inl"
void CompressorPVR::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
EPVRTColourSpace color_space = ePVRTCSpacelRGB;
//pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('b','g','r','a',8,8,8,8);
pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('r','g','b',0,8,8,8,0);
pvrtexture::CPVRTextureHeader header(src_pixel_type.PixelTypeID, w, h, d, 1/*num mips*/, 1/*num array*/, 1/*num faces*/, color_space, ePVRTVarTypeUnsignedByteNorm);
/*
uint count = w * h * d;
Array<Color32> tmp;
tmp.resize(count);
for (uint i = 0; i < count; i++) {
tmp[i] = toColor32(Vector4(data[0*count + i], data[1*count + i], data[2*count + i], data[3*count + i]));
}
*/
uint count = w * h * d;
Array<uint8> tmp;
tmp.resize(3 * count);
for (uint i = 0; i < count; i++) {
tmp[3*i+0] = data[0*count + i] * 255.0f;
tmp[3*i+1] = data[1*count + i] * 255.0f;
tmp[3*i+2] = data[2*count + i] * 255.0f;
}
pvrtexture::CPVRTexture texture(header, tmp.buffer());
pvrtexture::PixelType dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB);
if (compressionOptions.format == Format_PVR_2BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB);
else if (compressionOptions.format == Format_PVR_4BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGB);
else if (compressionOptions.format == Format_PVR_2BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGBA);
else if (compressionOptions.format == Format_PVR_4BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGBA);
bool success = pvrtexture::Transcode(texture, dst_pixel_type, ePVRTVarTypeUnsignedByteNorm, color_space, pvrtexture::ePVRTCNormal, false);
if (success) {
uint size = 0;
if (compressionOptions.format == Format_PVR_2BPP_RGB || compressionOptions.format == Format_PVR_2BPP_RGBA) {
// 2 bpp
const uint bpp = 2u;
const uint block_size = 8u * 4u;
const uint size_factor=(block_size*bpp)>>3u;
const uint block_width=nv::max((w>>3u), 2u);
const uint block_height=nv::max((h>>2u), 2u);
size = d * block_width * block_height * size_factor;
}
else {
// 4 bpp
const uint bpp = 4u;
const uint block_size = 4u * 4u;
const uint size_factor = (block_size*bpp) >> 3u;
const uint block_width = max((w>>2u), 2u);
const uint block_height = max((h>>2u), 2u);
size = d * block_width * block_height * size_factor;
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(texture.getDataPtr(), I32(size));
}
}
}
#endif

View File

@ -27,7 +27,6 @@
#include "Compressor.h" #include "Compressor.h"
namespace nv namespace nv
{ {
struct ColorBlock; struct ColorBlock;
@ -45,10 +44,150 @@ namespace nv
{ {
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0; virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const = 0; virtual uint blockSize(const nvtt::CompressionOptions::Private & compressionOptions) const = 0;
}; };
// BC1
struct FastCompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
struct CompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
// BC3
struct CompressorBC3_RGBM : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; }
};
// ETC
struct CompressorETC1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
struct CompressorETC2_R : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; }
};
struct CompressorETC2_RG : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorETC2_RGB : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; }
};
struct CompressorETC2_RGBA : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorETC2_RGBM : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
#if NV_USE_CRUNCH
struct CrunchCompressorETC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if NV_USE_INTEL_ISPC_TC
struct IspcCompressorBC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorBC3 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorBC7 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorETC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_ETCLIB)
struct EtcLibCompressor : public CompressorInterface
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_RGETC)
struct RgEtcCompressor : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
#if defined(HAVE_PVRTEXTOOL)
struct CompressorPVR : public CompressorInterface
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
} // nv namespace } // nv namespace

View File

@ -13,6 +13,7 @@ SET(NVTT_SRCS
CompressorDX11.h CompressorDX11.cpp CompressorDX11.h CompressorDX11.cpp
CompressorDXT1.h CompressorDXT1.cpp CompressorDXT1.h CompressorDXT1.cpp
CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp
CompressorETC.h CompressorETC.cpp
CompressorRGB.h CompressorRGB.cpp CompressorRGB.h CompressorRGB.cpp
Context.h Context.cpp Context.h Context.cpp
QuickCompressDXT.h QuickCompressDXT.cpp QuickCompressDXT.h QuickCompressDXT.cpp
@ -38,6 +39,7 @@ IF (CUDA_FOUND)
ENDIF (CUDA_FOUND) ENDIF (CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104)
ADD_DEFINITIONS(-DNVTT_EXPORTS) ADD_DEFINITIONS(-DNVTT_EXPORTS)
@ -47,7 +49,7 @@ ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS}) ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED) ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath) TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath rg_etc1)
INSTALL(TARGETS nvtt INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin RUNTIME DESTINATION bin

View File

@ -38,79 +38,6 @@ ClusterFit::ClusterFit()
{ {
} }
#if 0 // @@ Deprecate. Do not use color set directly.
void ClusterFit::setColorSet(const ColorSet * set)
{
// initialise the best error
#if NVTT_USE_SIMD
m_besterror = SimdVector( FLT_MAX );
Vector3 metric = m_metric.toVector3();
#else
m_besterror = FLT_MAX;
Vector3 metric = m_metric;
#endif
// cache some values
m_count = set->colorCount;
Vector3 values[16];
for (uint i = 0; i < m_count; i++)
{
values[i] = set->colors[i].xyz();
}
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric);
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric);
// build the list of values
int order[16];
float dps[16];
for (uint i = 0; i < m_count; ++i)
{
dps[i] = dot(values[i], principal);
order[i] = i;
}
// stable sort
for (uint i = 0; i < m_count; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
// weight all the points
#if NVTT_USE_SIMD
m_xxsum = SimdVector( 0.0f );
m_xsum = SimdVector( 0.0f );
#else
m_xxsum = Vector3(0.0f);
m_xsum = Vector3(0.0f);
m_wsum = 0.0f;
#endif
for (uint i = 0; i < m_count; ++i)
{
int p = order[i];
#if NVTT_USE_SIMD
NV_ALIGN_16 Vector4 tmp(values[p], 1);
m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]);
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
#else
m_weighted[i] = values[p] * set->weights[p];
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
m_weights[i] = set->weights[p];
m_wsum += m_weights[i];
#endif
}
}
#endif // 0
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count) void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
{ {
// initialise the best error // initialise the best error
@ -412,13 +339,13 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
#else #else
inline Vector3 round565(const Vector3 & v) { inline Vector3 round565(const Vector3 & v) {
uint r = ftoi_trunc(v.x * 31.0f); uint r = ftoi_trunc(v.x * 31.0f);
float r0 = float(((r+0) << 3) | ((r+0) >> 2)); float r0 = float(((r+0) << 3) | ((r+0) >> 2));
float r1 = float(((r+1) << 3) | ((r+1) >> 2)); float r1 = float(((r+1) << 3) | ((r+1) >> 2));
if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U); if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U);
r = (r << 3) | (r >> 2); r = (r << 3) | (r >> 2);
uint g = ftoi_trunc(v.y * 63.0f); uint g = ftoi_trunc(v.y * 63.0f);
float g0 = float(((g+0) << 2) | ((g+0) >> 4)); float g0 = float(((g+0) << 2) | ((g+0) >> 4));
float g1 = float(((g+1) << 2) | ((g+1) >> 4)); float g1 = float(((g+1) << 2) | ((g+1) >> 4));
if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U); if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U);
@ -429,7 +356,7 @@ inline Vector3 round565(const Vector3 & v) {
float b1 = float(((b+1) << 3) | ((b+1) >> 2)); float b1 = float(((b+1) << 3) | ((b+1) >> 2));
if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U); if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U);
b = (b << 3) | (b >> 2); b = (b << 3) | (b >> 2);
return Vector3(float(r)/255, float(g)/255, float(b)/255); return Vector3(float(r)/255, float(g)/255, float(b)/255);
} }

View File

@ -50,6 +50,7 @@ void CompressionOptions::reset()
m.format = Format_DXT1; m.format = Format_DXT1;
m.quality = Quality_Normal; m.quality = Quality_Normal;
m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f); m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f);
m.rgbmThreshold = 0.15f;
m.bitcount = 32; m.bitcount = 32;
m.bmask = 0x000000FF; m.bmask = 0x000000FF;
@ -102,6 +103,11 @@ void CompressionOptions::setColorWeights(float red, float green, float blue, flo
m.colorWeight.set(red, green, blue, alpha); m.colorWeight.set(red, green, blue, alpha);
} }
void CompressionOptions::setRGBMThreshold(float min_m)
{
m.rgbmThreshold = min_m;
}
/// Set color mask to describe the RGB/RGBA format. /// Set color mask to describe the RGB/RGBA format.
void CompressionOptions::setPixelFormat(uint bitCount, uint rmask, uint gmask, uint bmask, uint amask) void CompressionOptions::setPixelFormat(uint bitCount, uint rmask, uint gmask, uint bmask, uint amask)
@ -162,7 +168,7 @@ void CompressionOptions::setPixelType(PixelType pixelType)
/// Set pitch alignment in bytes. /// Set pitch alignment in bytes.
void CompressionOptions::setPitchAlignment(int pitchAlignment) void CompressionOptions::setPitchAlignment(int pitchAlignment)
{ {
nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(pitchAlignment)); nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(U32(pitchAlignment)));
m.pitchAlignment = pitchAlignment; m.pitchAlignment = pitchAlignment;
} }
@ -194,6 +200,10 @@ void CompressionOptions::setTargetDecoder(Decoder decoder)
} }
Format CompressionOptions::format() const
{
return m.format;
}
// Translate to and from D3D formats. // Translate to and from D3D formats.
unsigned int CompressionOptions::d3d9Format() const unsigned int CompressionOptions::d3d9Format() const
@ -246,10 +256,20 @@ unsigned int CompressionOptions::d3d9Format() const
FOURCC_ATI2, // Format_BC5 FOURCC_ATI2, // Format_BC5
FOURCC_DXT1, // Format_DXT1n FOURCC_DXT1, // Format_DXT1n
0, // Format_CTX1 0, // Format_CTX1
MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6 FOURCC_BC6H, // Format_BC6
MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7 FOURCC_BC7L, // Format_BC7
//FOURCC_ATI2, // Format_BC5_Luma FOURCC_DXT5, // Format_BC3_RGBM
FOURCC_DXT5, // Format_BC3_RGBM NV_MAKEFOURCC('E', 'T', 'C', '1'), // Format_ETC1
0, // Format_ETC2_R
0, // Format_ETC2_RG
NV_MAKEFOURCC('E', 'T', 'C', '2'), // Format_ETC2_RGB
0, // Format_ETC2_RGBA
0, // Format_ETC2_RGB_A1
0, // Format_ETC2_RGBM
FOURCC_PVR0,
FOURCC_PVR1,
FOURCC_PVR2,
FOURCC_PVR3,
}; };
NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count); NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count);
@ -258,12 +278,80 @@ unsigned int CompressionOptions::d3d9Format() const
} }
} }
/* unsigned int CompressionOptions::dxgiFormat() const // @@ Add srgb flag.
bool CompressionOptions::setDirect3D9Format(unsigned int format)
{ {
if (m.format == Format_RGB) {
if (m.pixelType == PixelType_UnsignedNorm) {
uint bitcount = m.bitcount;
uint rmask = m.rmask;
uint gmask = m.gmask;
uint bmask = m.bmask;
uint amask = m.amask;
if (bitcount == 0) {
bitcount = m.rsize + m.gsize + m.bsize + m.asize;
rmask = ((1 << m.rsize) - 1) << (m.asize + m.bsize + m.gsize);
gmask = ((1 << m.gsize) - 1) << (m.asize + m.bsize);
bmask = ((1 << m.bsize) - 1) << m.asize;
amask = ((1 << m.asize) - 1) << 0;
}
if (bitcount <= 32) {
return nv::findDXGIFormat(bitcount, rmask, gmask, bmask, amask);
}
else {
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_UNORM;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_UNORM;
}
}
else if (m.pixelType == PixelType_Float) {
if (m.rsize == 16 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16_FLOAT;
if (m.rsize == 32 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32_FLOAT;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_FLOAT;
if (m.rsize == 32 && m.gsize == 32 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32G32_FLOAT;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_FLOAT;
if (m.rsize == 32 && m.gsize == 32 && m.bsize == 32 && m.asize == 32) return DXGI_FORMAT_R32G32B32A32_FLOAT;
}
return 0;
}
else {
uint dxgi_formats[] = {
0, // Format_RGB,
DXGI_FORMAT_BC1_UNORM, // Format_DXT1
DXGI_FORMAT_BC1_UNORM, // Format_DXT1a
DXGI_FORMAT_BC2_UNORM, // Format_DXT3
DXGI_FORMAT_BC3_UNORM, // Format_DXT5
DXGI_FORMAT_BC3_UNORM, // Format_DXT5n
DXGI_FORMAT_BC4_UNORM, // Format_BC4
DXGI_FORMAT_BC5_UNORM, // Format_BC5
DXGI_FORMAT_BC1_UNORM, // Format_DXT1n
0, // Format_CTX1
DXGI_FORMAT_BC6H_UF16, // Format_BC6
DXGI_FORMAT_BC7_UNORM, // Format_BC7
DXGI_FORMAT_BC5_UNORM, // Format_BC3_RGBM
0, // Format_ETC1
0, // Format_ETC2_R
0, // Format_ETC2_RG
0, // Format_ETC2_RGB
0, // Format_ETC2_RGBA
0, // Format_ETC2_RGB_A1
0, // Format_ETC2_RGBM
0, // Format_PVR_2BPP_RGB
0, // Format_PVR_4BPP_RGB
0, // Format_PVR_2BPP_RGBA
0, // Format_PVR_4BPP_RGB
};
NV_COMPILER_CHECK(NV_ARRAY_SIZE(dxgi_formats) == Format_Count);
return dxgi_formats[m.format];
}
} }
unsigned int CompressionOptions::dxgiFormat() const /*
bool CompressionOptions::setDirect3D9Format(unsigned int format)
{ {
} }

View File

@ -39,6 +39,7 @@ namespace nvtt
Quality quality; Quality quality;
nv::Vector4 colorWeight; nv::Vector4 colorWeight;
float rgbmThreshold;
// Pixel format description. // Pixel format description.
uint bitcount; uint bitcount;

View File

@ -30,6 +30,7 @@
namespace nv namespace nv
{ {
struct CompressorInterface struct CompressorInterface
{ {
virtual ~CompressorInterface() {} virtual ~CompressorInterface() {}

View File

@ -39,7 +39,7 @@ using namespace nv;
using namespace nvtt; using namespace nvtt;
void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) void CompressorBC6::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{ {
// !!!UNDONE: support channel weights // !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
@ -77,7 +77,7 @@ void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[
ZOH::compress(zohTile, (char *)output); ZOH::compress(zohTile, (char *)output);
} }
void CompressorBC7::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output) void CompressorBC7::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{ {
// !!!UNDONE: support channel weights // !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...) // !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)

View File

@ -30,14 +30,14 @@ namespace nv
{ {
struct CompressorBC6 : public FloatColorCompressor struct CompressorBC6 : public FloatColorCompressor
{ {
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; } virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
}; };
struct CompressorBC7 : public FloatColorCompressor struct CompressorBC7 : public FloatColorCompressor
{ {
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; } virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
}; };
} // nv namespace } // nv namespace

View File

@ -28,7 +28,7 @@
#include "CompressionOptions.h" #include "CompressionOptions.h"
#include "OutputOptions.h" #include "OutputOptions.h"
#include "ClusterFit.h" #include "ClusterFit.h"
#include "CompressorDXT1.h" //#include "CompressorDXT1.h"
#include "CompressorDXT5_RGBM.h" #include "CompressorDXT5_RGBM.h"
// squish // squish
@ -48,45 +48,11 @@
#include <new> // placement new #include <new> // placement new
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
#endif
// squish
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
#endif
// d3dx
#if defined(HAVE_D3DX)
#include <d3dx9.h>
#endif
// stb
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
#endif
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
QuickCompress::compressDXT1(rgba, block);
}
void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{ {
@ -115,39 +81,13 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
} }
#if 1
void CompressorDXT1::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output);
}
#else
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
if (rgba.isSingleColor())
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1(rgba.color(0), block);
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}
}
#endif
void CompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) void CompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{ {
uint alphaMask = 0; uint alphaMask = 0;
for (uint i = 0; i < 16; i++) for (uint i = 0; i < 16; i++)
{ {
if (rgba.color(i).a == 0) alphaMask |= (3 << (i * 2)); // Set two bits for each color. if (rgba.color(i).a == 0) alphaMask |= (3U << (i * 2U)); // Set two bits for each color.
} }
const bool isSingleColor = rgba.isSingleColor(); const bool isSingleColor = rgba.isSingleColor();
@ -284,216 +224,6 @@ void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode
} }
void CompressorBC3_RGBM::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
float min_m = 0.25f; // @@ Get from compression options.
compress_dxt5_rgbm(colors, weights, min_m, (BlockDXT5 *)output);
}
#if defined(HAVE_ATITC)
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
// @@ Floating point input is not swizzled.
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
nvDebugCheck(false);
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)

View File

@ -32,12 +32,6 @@ namespace nv
struct ColorBlock; struct ColorBlock;
// Fast CPU compressors. // Fast CPU compressors.
struct FastCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorDXT1a : public ColorBlockCompressor struct FastCompressorDXT1a : public ColorBlockCompressor
{ {
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
@ -64,19 +58,6 @@ namespace nv
// Normal CPU compressors. // Normal CPU compressors.
#if 1
struct CompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#else
struct CompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
struct CompressorDXT1a : public ColorBlockCompressor struct CompressorDXT1a : public ColorBlockCompressor
{ {
@ -108,47 +89,9 @@ namespace nv
virtual uint blockSize() const { return 16; } virtual uint blockSize() const { return 16; }
}; };
struct CompressorBC3_RGBM : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
} // nv namespace } // nv namespace

View File

@ -218,13 +218,13 @@ static int evaluate_mse(const Color32 & p, const Color32 & c) {
return (square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b)); return (square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b));
} }
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) { /*static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) {
float e0 = evaluate_mse(palette[0], c, w); float e0 = evaluate_mse(palette[0], c, w);
float e1 = evaluate_mse(palette[1], c, w); float e1 = evaluate_mse(palette[1], c, w);
float e2 = evaluate_mse(palette[2], c, w); float e2 = evaluate_mse(palette[2], c, w);
float e3 = evaluate_mse(palette[3], c, w); float e3 = evaluate_mse(palette[3], c, w);
return min(min(e0, e1), min(e2, e3)); return min(min(e0, e1), min(e2, e3));
} }*/
static int evaluate_mse(const Color32 palette[4], const Color32 & c) { static int evaluate_mse(const Color32 palette[4], const Color32 & c) {
int e0 = evaluate_mse(palette[0], c); int e0 = evaluate_mse(palette[0], c);
@ -245,12 +245,12 @@ static int evaluate_mse(const BlockDXT1 * output, Color32 color, int index) {
// Returns weighted MSE error in [0-255] range. // Returns weighted MSE error in [0-255] range.
static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) { static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) {
float total = 0.0f; float total = 0.0f;
for (int i = 0; i < count; i++) { for (int i = 0; i < count; i++) {
total += weights[i] * evaluate_mse(palette, colors[i]); total += weights[i] * evaluate_mse(palette, colors[i]);
} }
return total; return total;
} }
#if 0 #if 0
@ -337,7 +337,7 @@ static void evaluate_palette(Color16 c0, Color16 c1, Vector3 palette[4]) {
} }
} }
static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) { /*static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
nvDebugCheck(c0.u > c1.u); nvDebugCheck(c0.u > c1.u);
Color32 palette32[4]; Color32 palette32[4];
@ -346,7 +346,7 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
for (int i = 0; i < 4; i++) { for (int i = 0; i < 4; i++) {
palette[i] = color_to_vector3(palette32[i]); palette[i] = color_to_vector3(palette32[i]);
} }
} }*/
@ -355,37 +355,37 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
static uint compute_indices4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { static uint compute_indices4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) {
uint indices = 0; uint indices = 0;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights);
float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights);
float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights);
float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights);
uint b0 = d0 > d3; uint b0 = d0 > d3;
uint b1 = d1 > d2; uint b1 = d1 > d2;
uint b2 = d0 > d2; uint b2 = d0 > d2;
uint b3 = d1 > d3; uint b3 = d1 > d3;
uint b4 = d2 > d3; uint b4 = d2 > d3;
uint x0 = b1 & b2; uint x0 = b1 & b2;
uint x1 = b0 & b3; uint x1 = b0 & b3;
uint x2 = b0 & b4; uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i); indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
} }
return indices; return indices;
} }
static uint compute_indices(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) { static uint compute_indices(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) {
uint indices = 0; uint indices = 0;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights); float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights);
float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights); float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights);
float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights); float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights);
float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights); float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights);
uint index; uint index;
if (d0 < d1 && d0 < d2 && d0 < d3) index = 0; if (d0 < d1 && d0 < d2 && d0 < d3) index = 0;
@ -491,7 +491,8 @@ float nv::compress_dxt1_single_color(const Vector3 * colors, const float * weigh
// Decompress block color. // Decompress block color.
Color32 palette[4]; Color32 palette[4];
output->evaluatePalette(palette, /*d3d9=*/false); evaluate_palette(output->col0, output->col1, palette);
//output->evaluatePalette(palette, /*d3d9=*/false);
Vector3 block_color = color_to_vector3(palette[output->indices & 0x3]); Vector3 block_color = color_to_vector3(palette[output->indices & 0x3]);
@ -668,7 +669,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight
// This is too expensive, even with a low threshold. // This is too expensive, even with a low threshold.
// If high quality: // If high quality:
if (0) { if (/* DISABLES CODE */ (0)) {
BlockDXT1 exhaustive_output; BlockDXT1 exhaustive_output;
float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output); float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output);
@ -720,7 +721,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight
// Least squares fitting of color end points for the given indices. @@ Take weights into account. // Least squares fitting of color end points for the given indices. @@ Take weights into account.
static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b) static bool optimize_end_points4(uint indices, const Vector4 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b)
{ {
float alpha2_sum = 0.0f; float alpha2_sum = 0.0f;
float beta2_sum = 0.0f; float beta2_sum = 0.0f;
@ -739,8 +740,8 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec
alpha2_sum += alpha * alpha; alpha2_sum += alpha * alpha;
beta2_sum += beta * beta; beta2_sum += beta * beta;
alphabeta_sum += alpha * beta; alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i]; alphax_sum += alpha * colors[i].xyz();
betax_sum += beta * colors[i]; betax_sum += beta * colors[i].xyz();
} }
float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum; float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
@ -756,7 +757,7 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec
// Least squares fitting of color end points for the given indices. @@ This does not support black/transparent index. @@ Take weights into account. // Least squares fitting of color end points for the given indices. @@ This does not support black/transparent index. @@ Take weights into account.
static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b) static bool optimize_end_points3(uint indices, const Vector3 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b)
{ {
float alpha2_sum = 0.0f; float alpha2_sum = 0.0f;
float beta2_sum = 0.0f; float beta2_sum = 0.0f;
@ -794,6 +795,90 @@ static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vec
// find minimum and maximum colors based on bounding box in color space
inline static void fit_colors_bbox(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
*c0 = Vector3(0);
*c1 = Vector3(255);
for (int i = 0; i < count; i++) {
*c0 = max(*c0, colors[i]);
*c1 = min(*c1, colors[i]);
}
}
inline static void select_diagonal(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 center = (*c0 + *c1) * 0.5f;
Vector2 covariance = Vector2(0);
for (int i = 0; i < count; i++) {
Vector3 t = colors[i] - center;
covariance += t.xy() * t.z;
}
float x0 = c0->x;
float y0 = c0->y;
float x1 = c1->x;
float y1 = c1->y;
if (covariance.x < 0) {
swap(x0, x1);
}
if (covariance.y < 0) {
swap(y0, y1);
}
c0->set(x0, y0, c0->z);
c1->set(x1, y1, c1->z);
}
inline static void inset_bbox(Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 inset = (*c0 - *c1) / 16.0f - (8.0f / 255.0f) / 16.0f;
*c0 = clamp(*c0 - inset, 0.0f, 255.0f);
*c1 = clamp(*c1 + inset, 0.0f, 255.0f);
}
float nv::compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output)
{
Vector3 colors[16];
float weights[16];
int count = reduce_colors(input_colors, input_weights, colors, weights);
if (count == 0) {
// Output trivial block.
output->col0.u = 0;
output->col1.u = 0;
output->indices = 0;
return 0;
}
float error = FLT_MAX;
error = compress_dxt1_single_color(colors, weights, count, color_weights, output);
if (error == 0.0f || count == 1) {
// Early out.
return error;
}
// Quick end point selection.
Vector3 c0, c1;
fit_colors_bbox(colors, count, &c0, &c1);
select_diagonal(colors, count, &c0, &c1);
inset_bbox(&c0, &c1);
output_block4(input_colors, color_weights, c0, c1, output);
// Refine color for the selected indices.
if (optimize_end_points4(output->indices, input_colors, 16, &c0, &c1)) {
output_block4(input_colors, color_weights, c0, c1, output);
}
return evaluate_mse(input_colors, input_weights, color_weights, output);
}

View File

@ -13,11 +13,14 @@ namespace nv {
float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output); float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output);
float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output); //float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output); float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output);
void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Cluster fit end point selection.
float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output); float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Quick end point selection followed by least squares refinement.
float compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output);
} }

View File

@ -3,6 +3,7 @@
#include "OptimalCompressDXT.h" #include "OptimalCompressDXT.h"
#include "QuickCompressDXT.h" #include "QuickCompressDXT.h"
#include "CompressorETC.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h" #include "nvimage/BlockDXT.h"
@ -17,14 +18,9 @@
using namespace nv; using namespace nv;
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) { static void convert_to_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, Vector4 rgbm_colors[16], float rgb_weights[16]) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
float weight_sum = 0; float weight_sum = 0;
@ -41,7 +37,7 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
float b = B / M; float b = B / M;
float a = (M - min_m) / (1 - min_m); float a = (M - min_m) / (1 - min_m);
input_colors_rgbm[i] = Vector4(r, g, b, a); rgbm_colors[i] = Vector4(r, g, b, a);
rgb_weights[i] = input_weights[i] * M; rgb_weights[i] = input_weights[i] * M;
weight_sum += input_weights[i]; weight_sum += input_weights[i];
} }
@ -50,6 +46,18 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
for (uint i = 0; i < 16; i++) rgb_weights[i] = 1; for (uint i = 0; i < 16; i++) rgb_weights[i] = 1;
} }
}
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights);
// Compress RGB. // Compress RGB.
compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color); compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color);
@ -138,291 +146,61 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
} }
float nv::compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output) {
// Convert to RGBM.
Vector4 rgbm_colors[16];
float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, rgbm_colors, rgb_weights);
#if 0 void * etc_output = (uint8 *)output + 8;
void * eac_output = output;
BlockDXT5 * block = new(output)BlockDXT5; // Compress RGB.
compress_etc2(rgbm_colors, rgb_weights, Vector3(1), etc_output);
// Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.
// Compress the resulting M values optimally.
// Repeat this several times until compression error does not improve?
//Vector3 rgb_block[16];
//float m_block[16];
// Init RGB/M block.
#if 0
nvsquish::WeightedClusterFit fit;
ColorBlock rgba;
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgba.color(i) = toColor32(Vector4(r, g, b, a));
}
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
#endif
#if 1
ColorSet rgb;
rgb.allocate(4, 4);
for (uint i = 0; i < 16; i++) {
const Vector4 & c = colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setColorWeights(compressionOptions.colorWeight);
fit.setColorSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
// Decompress RGB/M block. // Decompress RGB/M block.
nv::ColorBlock RGB; decompress_etc(etc_output, rgbm_colors);
block->color.decodeBlock(&RGB);
#if 1 // Compute M values to compensate for RGB's error.
AlphaBlock4x4 M;
for (int i = 0; i < 16; i++) { for (int i = 0; i < 16; i++) {
const Vector4 & c = colors[i]; const Vector4 & c = input_colors[i];
float R = saturate(c.x); float R = saturate(c.x);
float G = saturate(c.y); float G = saturate(c.y);
float B = saturate(c.z); float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f; float rm = rgbm_colors[i].x;
float g = RGB.color(i).g / 255.0f; float gm = rgbm_colors[i].y;
float b = RGB.color(i).b / 255.0f; float bm = rgbm_colors[i].z;
float m = (R / r + G / g + B / b) / 3.0f; // compute m such that m * (r/M, g/M, b/M) == RGB
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m)); // Three equations, one unknown:
//float m = max(max(R, G), max(B, min_m)); // m * r/M == R
// m * g/M == G
// m * b/M == B
// Solve in the least squares sense!
// m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T
// m == dot(rgb, RGB) / dot(rgb, rgb)
float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm));
if (!isFinite(m)) {
m = 1;
}
m = (m - min_m) / (1 - min_m); m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f)); // Store M in alpha channel.
M.weights[i] = weights[i]; rgbm_colors[i].w = saturate(m); // @@ What it we don't saturate?
} }
// Compress M. // Compress M.
if (compressionOptions.quality == Quality_Fastest) { compress_eac(rgbm_colors, input_weights, /*input_channel=*/3, /*search_radius=*/1, /*11bit_mode*/false, eac_output);
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#else
OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);
#endif
#if 0 return 0; // @@ Compute error.
// Decompress M. }
block->alpha.decodeBlock(&M);
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
for (uint i = 0; i < src.colorCount; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
//float m = max(max(R, G), max(B, min_m));
float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;
float r = R / m;
float g = G / m;
float b = B / m;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
#if 0
block->color.decodeBlock(&RGB);
//AlphaBlock4x4 M;
//M.initWeights(src);
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m));
//float m = max(max(R, G), max(B, min_m));
m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = src.weights[i];
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#endif
#if 0
src.fromRGBM(M, min_m);
src.createMinimalSet(/*ignoreTransparent=*/true);
if (src.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(src.color(0), &block->color);
}
else {
// @@ Use our improved compressor.
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&src);
Vector3 start, end;
fit.compress4(&start, &end);
if (fit.compress3(&start, &end)) {
QuickCompress::outputBlock3(src, start, end, block->color);
}
else {
QuickCompress::outputBlock4(src, start, end, block->color);
}
}
#endif // 0
// @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?
// RGBM encoding.
// Maximize precision.
// - Number of possible grey levels:
// - Naive: 2^3 = 8
// - Better: 2^3 + 2^2 = 12
// - How to choose min_m?
// - Ideal = Adaptive per block, don't know where to store.
// - Adaptive per lightmap. How to compute optimal?
// - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.
// - Optimal compressor: Interpolation artifacts.
// - Color transform.
// - Measure error in post-tone-mapping color space.
// - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.
// - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.
// - Enhanced DXT compressor.
// - Typical RGBM encoding as follows:
// rgb -> M = max(rgb), RGB=rgb/M -> RGBM
// - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.
// - We could ensure that M' >= M during compression.
// - We could clamp RGB anyway.
// - We could add a fixed scale value to take into account compression errors and avoid clamping.
// Compress color.
/*if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}*/
#endif // 0

View File

@ -5,5 +5,5 @@ namespace nv {
class Vector4; class Vector4;
float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output); float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output);
float compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output);
} }

2307
src/nvtt/CompressorETC.cpp Normal file

File diff suppressed because it is too large Load Diff

20
src/nvtt/CompressorETC.h Normal file
View File

@ -0,0 +1,20 @@
#include "nvcore/nvcore.h"
namespace nv {
class Vector3;
class Vector4;
void decompress_etc(const void * input_block, Vector4 output_colors[16]);
void decompress_eac(const void * input_block, Vector4 output_colors[16], int output_channel);
void decompress_etc_eac(const void * input_block, Vector4 output_colors[16]);
float compress_etc1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_etc2(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_etc2_a1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_eac(Vector4 input_colors[16], float input_weights[16], int input_channel, int search_radius, bool use_11bit_mode, void * output);
float compress_etc2_eac(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
}

View File

@ -250,6 +250,8 @@ namespace
// Compute shared exponent. // Compute shared exponent.
int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B; int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B;
nvDebugCheck(exp_shared_p <= Emax);
nvDebugCheck(exp_shared_p >= 0);
int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N))); int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N)));
@ -279,7 +281,7 @@ namespace
{ {
float v = max3(r, g, b); float v = max3(r, g, b);
uint rgbe; uint rgbe = 0;
if (v < 1e-32) { if (v < 1e-32) {
rgbe = 0; rgbe = 0;
@ -534,6 +536,7 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
} }
else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) { else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) {
// @@ // @@
ir = ig = ib = ia = 0;
} }
else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) { else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) {
ir = iround(clamp(r, 0.0f, 65535.0f)); ir = iround(clamp(r, 0.0f, 65535.0f));
@ -543,6 +546,11 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
} }
else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) { else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) {
// @@ // @@
ir = ig = ib = ia = 0;
}
else {
// @@
ir = ig = ib = ia = 0;
} }
uint p = 0; uint p = 0;

View File

@ -39,6 +39,7 @@
#include "cuda/CudaCompressorDXT.h" #include "cuda/CudaCompressorDXT.h"
#include "nvimage/DirectDrawSurface.h" #include "nvimage/DirectDrawSurface.h"
#include "nvimage/KtxFile.h"
#include "nvimage/ColorBlock.h" #include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h" #include "nvimage/BlockDXT.h"
#include "nvimage/Image.h" #include "nvimage/Image.h"
@ -51,6 +52,7 @@
#include "nvcore/Memory.h" #include "nvcore/Memory.h"
#include "nvcore/Ptr.h" #include "nvcore/Ptr.h"
#include "nvcore/Array.inl"
using namespace nv; using namespace nv;
using namespace nvtt; using namespace nvtt;
@ -222,11 +224,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
return false; return false;
} }
nvtt::Surface img;
img.setWrapMode(inputOptions.wrapMode);
img.setAlphaMode(inputOptions.alphaMode);
img.setNormalMap(inputOptions.isNormalMap);
const int faceCount = inputOptions.faceCount; const int faceCount = inputOptions.faceCount;
int width = inputOptions.width; int width = inputOptions.width;
int height = inputOptions.height; int height = inputOptions.height;
@ -244,97 +241,230 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel); if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel);
} }
if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, img.isNormalMap(), compressionOptions, outputOptions)) { if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, inputOptions.isNormalMap, compressionOptions, outputOptions)) {
return false; return false;
} }
// Output images. if (outputOptions.container != Container_KTX)
for (int f = 0; f < faceCount; f++)
{ {
int w = width; nvtt::Surface img;
int h = height; img.setWrapMode(inputOptions.wrapMode);
int d = depth; img.setAlphaMode(inputOptions.alphaMode);
bool canUseSourceImagesForThisFace = canUseSourceImages; img.setNormalMap(inputOptions.isNormalMap);
img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]); // Output each face from the largest mipmap to the smallest.
for (int f = 0; f < faceCount; f++)
{
int w = width;
int h = height;
int d = depth;
bool canUseSourceImagesForThisFace = canUseSourceImages;
// To normal map. img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
if (inputOptions.convertToNormalMap) {
img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
img.packNormals();
}
// To linear space. // To normal map.
if (!img.isNormalMap()) { if (inputOptions.convertToNormalMap) {
img.toLinear(inputOptions.inputGamma); img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
} img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
// Resize input.
img.resize(w, h, d, ResizeFilter_Box);
nvtt::Surface tmp = img;
if (!img.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
for (int m = 1; m < mipmapCount; m++) {
w = max(1, w/2);
h = max(1, h/2);
d = max(1, d/2);
int idx = m * faceCount + f;
bool useSourceImages = false;
if (canUseSourceImagesForThisFace) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
}
} }
if (useSourceImages) { // To linear space.
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]); if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
// For already generated mipmaps, we need to convert to linear. // Resize input.
if (!img.isNormalMap()) { img.resize(w, h, d, ResizeFilter_Box);
img.toLinear(inputOptions.inputGamma);
}
}
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch };
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
}
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) { nvtt::Surface tmp = img;
if (inputOptions.normalizeMipmaps) { if (!img.isNormalMap()) {
img.expandNormals();
img.normalizeNormalMap();
img.packNormals();
}
tmp = img;
}
else {
tmp = img;
tmp.toGamma(inputOptions.outputGamma); tmp.toGamma(inputOptions.outputGamma);
} }
quantize(tmp, compressionOptions); quantize(tmp, compressionOptions);
compress(tmp, f, m, compressionOptions, outputOptions); compress(tmp, f, 0, compressionOptions, outputOptions);
for (int m = 1; m < mipmapCount; m++) {
w = max(1, w/2);
h = max(1, h/2);
d = max(1, d/2);
int idx = m * faceCount + f;
bool useSourceImages = false;
if (canUseSourceImagesForThisFace) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
}
}
if (useSourceImages) {
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
// For already generated mipmaps, we need to convert to linear.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
}
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch };
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
}
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) {
if (inputOptions.normalizeMipmaps) {
img.expandNormals();
img.normalizeNormalMap();
img.packNormals();
}
tmp = img;
}
else {
tmp = img;
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, m, compressionOptions, outputOptions);
}
}
}
else
{
// KTX files expect face mipmaps to be interleaved.
Array<nvtt::Surface> images(faceCount);
Array<bool> mipChainBroken(faceCount);
int w = width;
int h = height;
int d = depth;
// https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16
uint imageSize = estimateSize(w, h, 1, 1, compressionOptions) * faceCount;
outputOptions.writeData(&imageSize, sizeof(uint32));
for (int f = 0; f < faceCount; f++)
{
nvtt::Surface s;
s.setWrapMode(inputOptions.wrapMode);
s.setAlphaMode(inputOptions.alphaMode);
s.setNormalMap(inputOptions.isNormalMap);
s.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
// To normal map.
if (inputOptions.convertToNormalMap) {
s.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
s.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
}
// To linear space.
if (!s.isNormalMap()) {
s.toLinear(inputOptions.inputGamma);
}
// Resize input.
s.resize(w, h, d, ResizeFilter_Box);
nvtt::Surface tmp = s;
if (!s.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
images.push_back(s);
mipChainBroken.push_back(false);
}
static const unsigned char padding[3] = {0, 0, 0};
for (int m = 1; m < mipmapCount; m++)
{
w = max(1, w/2);
h = max(1, h/2);
d = max(1, d/2);
// https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16
imageSize = estimateSize(w, h, d, 1, compressionOptions) * faceCount;
outputOptions.writeData(&imageSize, sizeof(uint32));
nvtt::Surface tmp;
for (int f = 0; f < faceCount; f++)
{
nvtt::Surface& img = images[f];
int idx = m * faceCount + f;
bool useSourceImages = false;
if (!mipChainBroken[f]) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
mipChainBroken[f] = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
}
}
if (useSourceImages) {
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
// For already generated mipmaps, we need to convert to linear.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
}
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
float params[2] = { inputOptions.kaiserStretch, inputOptions.kaiserAlpha };
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
}
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) {
if (inputOptions.normalizeMipmaps) {
img.normalizeNormalMap();
}
tmp = img;
}
else {
tmp = img;
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, m, compressionOptions, outputOptions);
//cube padding
if (faceCount == 6 && arraySize == 1)
{
//TODO calc offset for uncompressed images
}
}
int mipPadding = 3 - ((imageSize + 3) % 4);
if (mipPadding != 0) {
outputOptions.writeData(&padding, mipPadding);
}
} }
} }
@ -673,6 +803,131 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
return writeSucceed; return writeSucceed;
} }
else if (outputOptions.container == Container_KTX)
{
KtxHeader header;
// TODO cube arrays
if (textureType == TextureType_2D) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 1;
header.pixelDepth = 0;
}
else if (textureType == TextureType_Cube) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 6;
header.pixelDepth = 0;
}
else if (textureType == TextureType_3D) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 1;
header.pixelDepth = d;
}
else if (textureType == TextureType_Array) {
header.numberOfArrayElements = arraySize;
header.numberOfFaces = 1;
header.pixelDepth = 0; // Is it?
}
header.pixelWidth = w;
header.pixelHeight = h;
header.numberOfMipmapLevels = mipmapCount;
bool supported = true;
// TODO non-compressed formats
if (compressionOptions.format == Format_RGBA)
{
//header.glType = ?;
//header.glTypeSize = ?;
//header.glFormat = ?;
}
else
{
header.glType = 0;
header.glTypeSize = 1;
header.glFormat = 0;
if (compressionOptions.format == Format_DXT1) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_DXT1a) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_DXT3) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_BC4) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_RGTC1; // KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 ?
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED;
}
else if (compressionOptions.format == Format_BC5) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_RGTC2; // KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 ?
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG;
}
else if (compressionOptions.format == Format_BC6) {
if (compressionOptions.pixelType == PixelType_Float) header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
else /*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; // By default we assume unsigned.
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_BC7) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM : KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_ETC1) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC1 : KTX_INTERNAL_COMPRESSED_RGB_ETC1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_ETC2_R) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED;
}
else if (compressionOptions.format == Format_ETC2_RG) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG;
}
else if (compressionOptions.format == Format_ETC2_RGB) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC2 : KTX_INTERNAL_COMPRESSED_RGB_ETC2;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_ETC2_RGBA) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC : KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else {
supported = false;
}
//TODO compressionOptions.format == Format_DXT1n, Format_DXT5n ? There seems to be no way to indicate a normal map using ktx. Maybe via key value data?
}
if (!supported)
{
// This container does not support the requested format.
outputOptions.error(Error_UnsupportedOutputFormat);
return false;
}
const uint headerSize = 64;
nvStaticCheck(sizeof(KtxHeader) == 64);
bool writeSucceed = outputOptions.writeData(&header, headerSize);
if (!writeSucceed)
{
outputOptions.error(Error_FileWrite);
}
return writeSucceed;
}
return true; return true;
} }
@ -788,15 +1043,34 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression
{ {
return new CompressorBC7; return new CompressorBC7;
} }
/*else if (compressionOptions.format == Format_BC5_Luma)
{
return new ProductionCompressorBC5_Luma;
}*/
else if (compressionOptions.format == Format_BC3_RGBM) else if (compressionOptions.format == Format_BC3_RGBM)
{ {
return new CompressorBC3_RGBM; return new CompressorBC3_RGBM;
} }
else if (compressionOptions.format >= Format_ETC1 && compressionOptions.format <= Format_ETC2_RGB_A1)
{
#if defined(HAVE_RGETC)
if (compressionOptions.format == Format_ETC1 && compressionOptions.externalCompressor == "rg_etc") return new RgEtcCompressor;
#endif
#if defined(HAVE_ETCLIB)
if (compressionOptions.externalCompressor == "etclib") return new EtcLibCompressor;
#endif
if (compressionOptions.format == Format_ETC1) return new CompressorETC1;
else if (compressionOptions.format == Format_ETC2_R) return new CompressorETC2_R;
//else if (compressionOptions.format == Format_ETC2_RG) return new CompressorETC2_RG;
else if (compressionOptions.format == Format_ETC2_RGB) return new CompressorETC2_RGB;
else if (compressionOptions.format == Format_ETC2_RGBA) return new CompressorETC2_RGBA;
}
else if (compressionOptions.format == Format_ETC2_RGBM)
{
return new CompressorETC2_RGBM;
}
else if (compressionOptions.format >= Format_PVR_2BPP_RGB && compressionOptions.format <= Format_PVR_4BPP_RGBA)
{
#if defined(HAVE_PVRTEXTOOL)
return new CompressorPVR;
#endif
}
return NULL; return NULL;
} }
@ -860,3 +1134,24 @@ CompressorInterface * Compressor::Private::chooseGpuCompressor(const Compression
return NULL; return NULL;
} }
int Compressor::Private::estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const
{
const Format format = compressionOptions.format;
const uint bitCount = compressionOptions.bitcount;
const uint pitchAlignment = compressionOptions.pitchAlignment;
int size = 0;
for (int m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, pitchAlignment, format);
// Compute extents of next mipmap:
w = max(1, w / 2);
h = max(1, h / 2);
d = max(1, d / 2);
}
return size;
}

View File

@ -56,6 +56,7 @@ namespace nvtt
nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const; nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const;
nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const; nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const;
int estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const;
bool cudaSupported; bool cudaSupported;
bool cudaEnabled; bool cudaEnabled;

View File

@ -34,59 +34,59 @@
namespace nvtt namespace nvtt
{ {
struct DefaultOutputHandler : public nvtt::OutputHandler struct DefaultOutputHandler : public nvtt::OutputHandler
{ {
DefaultOutputHandler(const char * fileName) : stream(fileName) {} DefaultOutputHandler(const char * fileName) : stream(fileName) {}
DefaultOutputHandler(FILE * fp) : stream(fp, false) {} DefaultOutputHandler(FILE * fp) : stream(fp, false) {}
virtual ~DefaultOutputHandler() {} virtual ~DefaultOutputHandler() {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel) virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{ {
// ignore. // ignore.
} }
// Output data. // Output data.
virtual bool writeData(const void * data, int size) virtual bool writeData(const void * data, int size)
{ {
stream.serialize(const_cast<void *>(data), size); stream.serialize(const_cast<void *>(data), size);
//return !stream.isError(); //return !stream.isError();
return true; return true;
} }
virtual void endImage() virtual void endImage()
{ {
// ignore. // ignore.
} }
nv::StdOutputStream stream; nv::StdOutputStream stream;
}; };
struct OutputOptions::Private struct OutputOptions::Private
{ {
nv::Path fileName; nv::Path fileName;
FILE * fileHandle; FILE * fileHandle;
OutputHandler * outputHandler; OutputHandler * outputHandler;
ErrorHandler * errorHandler; ErrorHandler * errorHandler;
bool outputHeader; bool outputHeader;
Container container; Container container;
int version; int version;
bool srgb; bool srgb;
bool deleteOutputHandler; bool deleteOutputHandler;
void * wrapperProxy; // For the C/C# wrapper. void * wrapperProxy; // For the C/C# wrapper.
bool hasValidOutputHandler() const; bool hasValidOutputHandler() const;
void beginImage(int size, int width, int height, int depth, int face, int miplevel) const; void beginImage(int size, int width, int height, int depth, int face, int miplevel) const;
bool writeData(const void * data, int size) const; bool writeData(const void * data, int size) const;
void endImage() const; void endImage() const;
void error(Error e) const; void error(Error e) const;
}; };
} // nvtt namespace } // nvtt namespace

View File

@ -39,21 +39,21 @@ namespace nv
struct AlphaBlockDXT5; struct AlphaBlockDXT5;
class Vector3; class Vector3;
namespace QuickCompress namespace QuickCompress
{ {
void compressDXT1(const ColorBlock & src, BlockDXT1 * dst); void compressDXT1(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst); void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT3(const ColorBlock & src, BlockDXT3 * dst); void compressDXT3(const ColorBlock & src, BlockDXT3 * dst);
void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8); void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8); void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8); void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8);
void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block); void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
} }
} // nv namespace } // nv namespace
#endif // NV_TT_QUICKCOMPRESSDXT_H #endif // NV_TT_QUICKCOMPRESSDXT_H

View File

@ -23,12 +23,14 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include "Surface.h" #include "Surface.h"
#include "CompressorETC.h" // for ETC decoder.
#include "nvmath/Vector.inl" #include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl" #include "nvmath/Matrix.inl"
#include "nvmath/Color.h" #include "nvmath/Color.h"
#include "nvmath/Half.h" #include "nvmath/Half.h"
#include "nvmath/ftoi.h" #include "nvmath/ftoi.h"
#include "nvmath/PackedFloat.h"
#include "nvimage/Filter.h" #include "nvimage/Filter.h"
#include "nvimage/ImageIO.h" #include "nvimage/ImageIO.h"
@ -39,8 +41,13 @@
#include "nvimage/ErrorMetric.h" #include "nvimage/ErrorMetric.h"
#include "nvimage/DirectDrawSurface.h" #include "nvimage/DirectDrawSurface.h"
#include "nvthread/ParallelFor.h"
#include "nvcore/Array.inl"
#include <float.h> #include <float.h>
#include <string.h> // memset, memcpy #include <string.h> // memset, memcpy
//#include <stdio.h> // printf?
#if NV_CC_GNUC #if NV_CC_GNUC
#include <math.h> // exp2f and log2f #include <math.h> // exp2f and log2f
@ -123,6 +130,18 @@ namespace
else if (format == Format_BC7) { else if (format == Format_BC7) {
return 16; return 16;
} }
else if (format == Format_ETC1 || format == Format_ETC2_R || format == Format_ETC2_RGB) {
return 8;
}
else if (format == Format_ETC2_RG || format == Format_ETC2_RGBA || format == Format_ETC2_RGBM) {
return 16;
}
else if (format == Format_PVR_2BPP_RGB || format == Format_PVR_2BPP_RGBA) {
return 4;
}
else if (format == Format_PVR_4BPP_RGB || format == Format_PVR_4BPP_RGBA) {
return 8;
}
return 0; return 0;
} }
@ -197,7 +216,7 @@ uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlign
} }
} }
void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType) { void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType, nvtt::ShapeRestriction shapeRestriction /*= nvtt::ShapeRestriction_None*/) {
nvDebugCheck(width != NULL && *width > 0); nvDebugCheck(width != NULL && *width > 0);
nvDebugCheck(height != NULL && *height > 0); nvDebugCheck(height != NULL && *height > 0);
nvDebugCheck(depth != NULL && *depth > 0); nvDebugCheck(depth != NULL && *depth > 0);
@ -234,21 +253,21 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
// Round to power of two. // Round to power of two.
if (roundMode == RoundMode_ToNextPowerOfTwo) if (roundMode == RoundMode_ToNextPowerOfTwo)
{ {
w = nextPowerOfTwo(w); w = nextPowerOfTwo(U32(w));
h = nextPowerOfTwo(h); h = nextPowerOfTwo(U32(h));
d = nextPowerOfTwo(d); d = nextPowerOfTwo(U32(d));
} }
else if (roundMode == RoundMode_ToNearestPowerOfTwo) else if (roundMode == RoundMode_ToNearestPowerOfTwo)
{ {
w = nearestPowerOfTwo(w); w = nearestPowerOfTwo(U32(w));
h = nearestPowerOfTwo(h); h = nearestPowerOfTwo(U32(h));
d = nearestPowerOfTwo(d); d = nearestPowerOfTwo(U32(d));
} }
else if (roundMode == RoundMode_ToPreviousPowerOfTwo) else if (roundMode == RoundMode_ToPreviousPowerOfTwo)
{ {
w = previousPowerOfTwo(w); w = previousPowerOfTwo(U32(w));
h = previousPowerOfTwo(h); h = previousPowerOfTwo(U32(h));
d = previousPowerOfTwo(d); d = previousPowerOfTwo(U32(d));
} }
else if (roundMode == RoundMode_ToNextMultipleOfFour) else if (roundMode == RoundMode_ToNextMultipleOfFour)
{ {
@ -269,6 +288,38 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
d = previousMultipleOfFour(d); d = previousMultipleOfFour(d);
} }
if(shapeRestriction == ShapeRestriction_Square)
{
if (textureType == TextureType_2D)
{
int md = nv::min(w,h);
w = md;
h = md;
d = 1;
}
else if (textureType == TextureType_3D)
{
int md = nv::min(nv::min(w,h),d);
w = md;
h = md;
d = md;
}
else if (textureType == TextureType_Cube)
{
int md = nv::min(w, h);
w = md;
h = md;
d = 1;
}
}
else
{
if (textureType == TextureType_2D || textureType == TextureType_Cube)
{
d = 1;
}
}
*width = w; *width = w;
*height = h; *height = h;
*depth = d; *depth = d;
@ -509,8 +560,8 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_c
} }
} }
*rangeMin = range.x; if (rangeMin) *rangeMin = range.x;
*rangeMax = range.y; if (rangeMax) *rangeMax = range.y;
} }
bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/) bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
@ -583,7 +634,7 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
} }
// @@ Have loadFloat allocate the image with the desired number of channels. // @@ Have loadFloat allocate the image with the desired number of channels.
img->resizeChannelCount(4); //img->resizeChannelCount(4);
delete m->image; delete m->image;
m->image = img.release(); m->image = img.release();
@ -601,7 +652,8 @@ bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) c
return ImageIO::saveFloat(fileName, m->image, 0, 4); return ImageIO::saveFloat(fileName, m->image, 0, 4);
} }
else { else {
AutoPtr<Image> image(m->image->createImage(0, 4)); uint c = min<uint>(m->image->componentCount(), 4);
AutoPtr<Image> image(m->image->createImage(0, c));
nvCheck(image != NULL); nvCheck(image != NULL);
if (hasAlpha) { if (hasAlpha) {
@ -829,16 +881,35 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
return true; return true;
} }
#if defined(HAVE_PVRTEXTOOL)
#include <PVRTDecompress.h>
#endif
// @@ Add support for compressed 3D textures. // @@ Add support for compressed 3D textures.
bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data) bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data)
{ {
if (format != nvtt::Format_BC1 && if (format != nvtt::Format_BC1 &&
format != nvtt::Format_BC2 && format != nvtt::Format_BC2 &&
format != nvtt::Format_BC3 && format != nvtt::Format_BC3 &&
format != nvtt::Format_BC3n &&
format != nvtt::Format_BC3_RGBM &&
format != nvtt::Format_BC4 && format != nvtt::Format_BC4 &&
format != nvtt::Format_BC5 && format != nvtt::Format_BC5 &&
format != nvtt::Format_BC6 && format != nvtt::Format_BC6 &&
format != nvtt::Format_BC7) format != nvtt::Format_BC7 &&
format != nvtt::Format_ETC1 &&
format != nvtt::Format_ETC2_R &&
format != nvtt::Format_ETC2_RG &&
format != nvtt::Format_ETC2_RGB &&
format != nvtt::Format_ETC2_RGBA &&
format != nvtt::Format_ETC2_RGBM
#if defined(HAVE_PVRTEXTOOL)
&& format != nvtt::Format_PVR_2BPP_RGB
&& format != nvtt::Format_PVR_4BPP_RGB
&& format != nvtt::Format_PVR_2BPP_RGBA
&& format != nvtt::Format_PVR_4BPP_RGBA
#endif
)
{ {
return false; return false;
} }
@ -851,7 +922,7 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
m->image->allocate(4, w, h, 1); m->image->allocate(4, w, h, 1);
m->type = TextureType_2D; m->type = TextureType_2D;
const int bw = (w + 3) / 4; const int bw = (w + 3) / 4; // @@ Not if PVR 2bpp!
const int bh = (h + 3) / 4; const int bh = (h + 3) / 4;
const uint bs = blockSize(format); const uint bs = blockSize(format);
@ -859,130 +930,166 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
const uint8 * ptr = (const uint8 *)data; const uint8 * ptr = (const uint8 *)data;
TRY { TRY {
if (format == nvtt::Format_BC6) #if defined(HAVE_PVRTEXTOOL)
{ if (format >= nvtt::Format_PVR_2BPP_RGB && format <= nvtt::Format_PVR_4BPP_RGBA)
// BC6 format - decode directly to float {
bool two_bit_mode = (format == nvtt::Format_PVR_2BPP_RGB || format == nvtt::Format_PVR_2BPP_RGBA);
for (int y = 0; y < bh; y++) uint8 * output = new uint8[4 * w * h];
{
for (int x = 0; x < bw; x++)
{
Vector3 colors[16];
const BlockBC6 * block = (const BlockBC6 *)ptr;
block->decodeBlock(colors);
for (int yy = 0; yy < 4; yy++) PVRTDecompressPVRTC(ptr, two_bit_mode, w, h, output);
{
for (int xx = 0; xx < 4; xx++)
{
Vector3 rgb = colors[yy*4 + xx];
if (x * 4 + xx < w && y * 4 + yy < h) for (int y = 0; y < h; y++) {
{ for (int x = 0; x < w; x++) {
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = rgb.x; m->image->pixel(0, x, y, 0) = output[4*(y*w + x) + 0] / 255.0f;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = rgb.y; m->image->pixel(1, x, y, 0) = output[4*(y*w + x) + 1] / 255.0f;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = rgb.z; m->image->pixel(2, x, y, 0) = output[4*(y*w + x) + 2] / 255.0f;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = 1.0f; m->image->pixel(3, x, y, 0) = output[4*(y*w + x) + 3] / 255.0f;
} }
} }
}
ptr += bs; delete [] output;
} }
} else
} #endif
else if (format == nvtt::Format_BC6 || (format >= nvtt::Format_ETC1 && format <= nvtt::Format_ETC2_RGBM))
{ {
// Non-BC6 - decode to 8-bit, then convert to float // Some formats we decode directly to float:
for (int y = 0; y < bh; y++) for (int y = 0; y < bh; y++) {
{ for (int x = 0; x < bw; x++) {
for (int x = 0; x < bw; x++) Vector4 colors[16];
{
ColorBlock colors;
if (format == nvtt::Format_BC1) if (format == nvtt::Format_BC6) {
{ const BlockBC6 * block = (const BlockBC6 *)ptr;
const BlockDXT1 * block = (const BlockDXT1 *)ptr; block->decodeBlock(colors);
}
else if (format == nvtt::Format_ETC1 || format == nvtt::Format_ETC2_RGB) {
nv::decompress_etc(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RGBA || format == nvtt::Format_ETC2_RGBM) {
nv::decompress_etc_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_R) {
// @@ Not implemented.
//nv::decompress_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RG) {
// @@ Not implemented.
//nv::decompress_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RGB_A1) {
// @@ Not implemented?
//nv::decompress_etc(ptr, colors);
}
if (decoder == Decoder_D3D10) { for (int yy = 0; yy < 4; yy++) {
block->decodeBlock(&colors, false); for (int xx = 0; xx < 4; xx++) {
} Vector4 c = colors[yy*4 + xx];
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC2)
{
const BlockDXT3 * block = (const BlockDXT3 *)ptr;
if (decoder == Decoder_D3D10) { if (x * 4 + xx < w && y * 4 + yy < h) {
block->decodeBlock(&colors, false); m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = c.x;
} m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = c.y;
else if (decoder == Decoder_D3D9) { m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = c.z;
block->decodeBlock(&colors, false); m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = c.w;
} }
else if (decoder == Decoder_NV5x) { }
block->decodeBlockNV5x(&colors); }
}
}
else if (format == nvtt::Format_BC3)
{
const BlockDXT5 * block = (const BlockDXT5 *)ptr;
if (decoder == Decoder_D3D10) { ptr += bs;
block->decodeBlock(&colors, false); }
} }
else if (decoder == Decoder_D3D9) { }
block->decodeBlock(&colors, false); else
} {
else if (decoder == Decoder_NV5x) { // Others, we decode to 8-bit, then convert to float
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC4)
{
const BlockATI1 * block = (const BlockATI1 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC5)
{
const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC7)
{
const BlockBC7 * block = (const BlockBC7 *)ptr;
block->decodeBlock(&colors);
}
else
{
nvDebugCheck(false);
}
for (int yy = 0; yy < 4; yy++) for (int y = 0; y < bh; y++) {
{ for (int x = 0; x < bw; x++) {
for (int xx = 0; xx < 4; xx++) ColorBlock colors;
{
Color32 c = colors.color(xx, yy);
if (x * 4 + xx < w && y * 4 + yy < h) if (format == nvtt::Format_BC1)
{ {
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f; const BlockDXT1 * block = (const BlockDXT1 *)ptr;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f;
}
}
}
ptr += bs; if (decoder == Decoder_D3D10) {
} block->decodeBlock(&colors, false);
} }
} else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC2)
{
const BlockDXT3 * block = (const BlockDXT3 *)ptr;
if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC3 || format == nvtt::Format_BC3n || format == nvtt::Format_BC3_RGBM)
{
const BlockDXT5 * block = (const BlockDXT5 *)ptr;
if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC4)
{
const BlockATI1 * block = (const BlockATI1 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC5)
{
const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC7)
{
const BlockBC7 * block = (const BlockBC7 *)ptr;
block->decodeBlock(&colors);
}
else
{
nvDebugCheck(false);
}
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Color32 c = colors.color(xx, yy);
if (x * 4 + xx < w && y * 4 + yy < h)
{
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f;
}
}
}
ptr += bs;
}
}
}
} }
CATCH { CATCH {
return false; return false;
@ -1092,7 +1199,7 @@ void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth
m->image = img; m->image = img;
} }
void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter) void Surface::resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter)
{ {
if (isNull()) return; if (isNull()) return;
@ -1104,27 +1211,17 @@ void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilte
int h = m->image->height(); int h = m->image->height();
int d = m->image->depth(); int d = m->image->depth();
getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type); getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type, nvtt::ShapeRestriction_Square);
if (m->type == TextureType_2D) if (m->type == TextureType_2D)
{ {
nvDebugCheck(d==1); nvDebugCheck(d==1);
int md = nv::min(w,h);
w = md;
h = md;
} }
else if (m->type == TextureType_Cube) else if (m->type == TextureType_Cube)
{ {
nvDebugCheck(d==1); nvDebugCheck(d==1);
nvDebugCheck(w==h); nvDebugCheck(w==h);
} }
else if (m->type == TextureType_3D)
{
int md = nv::min(nv::min(w,h),d);
w = md;
h = md;
d = md;
}
resize(w, h, d, filter, filterWidth, params); resize(w, h, d, filter, filterWidth, params);
} }
@ -1151,6 +1248,63 @@ void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, fl
resize(w, h, d, filter, filterWidth, params); resize(w, h, d, filter, filterWidth, params);
} }
float rmsBilinearError(nvtt::Surface original, nvtt::Surface resized) {
return nv::rmsBilinearColorError(original.m->image, resized.m->image, (FloatImage::WrapMode)original.wrapMode(), original.alphaMode() == AlphaMode_Transparency);
}
void Surface::autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter)
{
Surface original = *this;
Surface resized = original;
int w = width();
int h = height();
int d = depth();
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
while (w >= 4 && h >= 4 && d >= 1) {
// Resize always from original? This is more expensive, but should produce higher quality.
//resized = original;
resized.resize(w, h, d, filter);
#if 0
// Scale back up to original size. @@ Upscaling not implemented!
Surface restored = resized;
restored.resize(original.width(), original.height(), original.depth(), ResizeFilter_Triangle);
float error;
if (isNormalMap()) {
error = nvtt::angularError(original, restored);
}
else {
error = nvtt::rmsError(original, restored);
}
#else
float error = rmsBilinearError(original, resized);
#endif
if (error < errorTolerance) {
*this = resized;
nvDebug("image resized %dx%d -> %dx%d (error=%f)\n", original.width(), original.height(), w, h, error);
}
else {
nvDebug("image can't be resized further (error=%f)\n", error);
break;
}
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
}
bool Surface::canMakeNextMipmap(int min_size /*= 1*/) bool Surface::canMakeNextMipmap(int min_size /*= 1*/)
{ {
if (isNull()) return false; if (isNull()) return false;
@ -1196,7 +1350,7 @@ bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const floa
{ {
nvDebugCheck(filter == MipmapFilter_Kaiser); nvDebugCheck(filter == MipmapFilter_Kaiser);
KaiserFilter filter(filterWidth); KaiserFilter filter(filterWidth);
if (params != NULL) filter.setParameters(params[0], params[1]); if (params != NULL) filter.setParameters(/*alpha=*/params[0], /*stretch=*/params[1]);
img = img->downSample(filter, wrapMode, 3); img = img->downSample(filter, wrapMode, 3);
} }
} }
@ -1357,8 +1511,9 @@ void Surface::toSrgb()
for (uint c = 0; c < 3; c++) { for (uint c = 0; c < 3; c++) {
float * channel = img->channel(c); float * channel = img->channel(c);
for (uint i = 0; i < count; i++) { for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [=](int i) {
channel[i] = ::toSrgb(channel[i]); channel[i] = ::toSrgb(channel[i]);
} }//);
} }
} }
@ -1382,8 +1537,9 @@ void Surface::toLinearFromSrgb()
for (uint c = 0; c < 3; c++) { for (uint c = 0; c < 3; c++) {
float * channel = img->channel(c); float * channel = img->channel(c);
for (uint i = 0; i < count; i++) { for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [=](int i) {
channel[i] = ::fromSrgb(channel[i]); channel[i] = ::fromSrgb(channel[i]);
} }//);
} }
} }
@ -2827,6 +2983,78 @@ Surface Surface::createSubImage(int x0, int x1, int y0, int y1, int z0, int z1)
return s; return s;
} }
Surface Surface::warp(int w, int h, WarpFunction * warp_function) const
{
Surface s;
FloatImage * img = s.m->image = new FloatImage;
const int C = m->image->componentCount();
img->allocate(C, w, h, 1);
#define USE_PARALLEL_FOR 0
#if USE_PARALLEL_FOR
nv::parallel_for(h, 1, [=](int y) {
#else
for (int y = 0; y < h; y++) {
#endif
for (int x = 0; x < w; x++) {
float fx = (float(x) + 0.0f) / w;
float fy = (float(y) + 0.0f) / h;
float fz = 0;
warp_function(fx, fy, fz);
for (int c = 0; c < C; c++) {
img->pixel(c, x, y, 0) = m->image->sampleLinearClamp(c, fx, fy);
}
}
}
#if USE_PARALLEL_FOR
);
#endif
return s;
}
Surface Surface::warp(int w, int h, int d, WarpFunction * warp_function) const
{
Surface s;
FloatImage * img = s.m->image = new FloatImage;
const int C = m->image->componentCount();
img->allocate(C, w, h, d);
for (int z = 0; z < d; z++) {
#define USE_PARALLEL_FOR 0
#if USE_PARALLEL_FOR
nv::parallel_for(h, 1, [=](int y) {
#else
for (int y = 0; y < h; y++) {
#endif
for (int x = 0; x < w; x++) {
float fx = (float(x) + 0.0f) / w;
float fy = (float(y) + 0.0f) / h;
float fz = (float(z) + 0.0f) / d;
warp_function(fx, fy, fz);
for (int c = 0; c < C; c++) {
img->pixel(c, x, y, z) = m->image->sampleLinearClamp(c, fx, fy, fz); // @@ 2D only.
}
}
}
#if USE_PARALLEL_FOR
);
#endif
}
return s;
}
bool Surface::copyChannel(const Surface & srcImage, int srcChannel) bool Surface::copyChannel(const Surface & srcImage, int srcChannel)
{ {
return copyChannel(srcImage, srcChannel, srcChannel); return copyChannel(srcImage, srcChannel, srcChannel);
@ -2953,7 +3181,7 @@ void Surface::setAtlasBorder(int aw, int ah, float r, float g, float b, float a)
} }
// Vertical lines: // Vertical lines:
for (uint i = 0, x = 0; i < uint(ah); i++, x += tile_width) for (uint i = 0, x = 0; i < uint(aw); i++, x += tile_width)
{ {
for (uint y = 0; y < h; y++) for (uint y = 0; y < h; y++)
{ {
@ -3083,9 +3311,9 @@ Surface nvtt::histogram(const Surface & img, int width, int height)
return histogram(img, /*minRange*/0, maxRange, width, height); return histogram(img, /*minRange*/0, maxRange, width, height);
} }
#include "nvcore/Array.inl" //#include "nvcore/Array.inl"
#include "nvmath/PackedFloat.h" //#include "nvmath/PackedFloat.h"
#include <stdio.h> //#include <stdio.h>
nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height) nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height)
{ {
@ -3234,7 +3462,7 @@ nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRang
maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z)); maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z));
} }
printf("maxh = %f\n", maxh); //printf("maxh = %f\n", maxh);
//maxh = 80; //maxh = 80;
maxh = 256; maxh = 256;

View File

@ -83,7 +83,7 @@ namespace nv {
uint countMipmaps(uint w, uint h, uint d); uint countMipmaps(uint w, uint h, uint d);
uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size); uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size);
uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format); uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format);
void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType); void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType, nvtt::ShapeRestriction shapeRestriction = nvtt::ShapeRestriction_None);
} }

View File

@ -10,8 +10,8 @@
// Gran Central Dispatch (GCD/libdispatch) // Gran Central Dispatch (GCD/libdispatch)
// http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html // http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) #if NV_OS_DARWIN && defined(HAVE_DISPATCH_H)
#define HAVE_GCD 1 //#define HAVE_GCD 1
#include <dispatch/dispatch.h> //#include <dispatch/dispatch.h>
#endif #endif
// Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime: // Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime:
@ -64,7 +64,7 @@ namespace nvtt {
#endif #endif
#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H) #if HAVE_GCD
// Task dispatcher using Apple's Grand Central Dispatch. // Task dispatcher using Apple's Grand Central Dispatch.
struct AppleTaskDispatcher : public TaskDispatcher struct AppleTaskDispatcher : public TaskDispatcher

View File

@ -47,9 +47,9 @@ const char * nvtt::errorString(Error e)
return "Error writing through output handler"; return "Error writing through output handler";
case Error_UnsupportedOutputFormat: case Error_UnsupportedOutputFormat:
return "The container file does not support the selected output format"; return "The container file does not support the selected output format";
default:
return "Invalid error";
} }
return "Invalid error";
} }
// Return NVTT version. // Return NVTT version.

View File

@ -105,7 +105,21 @@ namespace nvtt
Format_BC6, Format_BC6,
Format_BC7, Format_BC7,
Format_BC3_RGBM, // Format_BC3_RGBM,
Format_ETC1,
Format_ETC2_R,
Format_ETC2_RG,
Format_ETC2_RGB,
Format_ETC2_RGBA,
Format_ETC2_RGB_A1,
Format_ETC2_RGBM,
Format_PVR_2BPP_RGB, // Using PVR textools.
Format_PVR_4BPP_RGB,
Format_PVR_2BPP_RGBA,
Format_PVR_4BPP_RGBA,
Format_Count Format_Count
}; };
@ -155,6 +169,7 @@ namespace nvtt
NVTT_API void setFormat(Format format); NVTT_API void setFormat(Format format);
NVTT_API void setQuality(Quality quality); NVTT_API void setQuality(Quality quality);
NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f); NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f);
NVTT_API void setRGBMThreshold(float min_m);
NVTT_API void setExternalCompressor(const char * name); NVTT_API void setExternalCompressor(const char * name);
@ -173,9 +188,10 @@ namespace nvtt
NVTT_API void setTargetDecoder(Decoder decoder); NVTT_API void setTargetDecoder(Decoder decoder);
// Translate to and from D3D formats. // Translate to and from D3D formats.
NVTT_API Format format() const;
NVTT_API unsigned int d3d9Format() const; NVTT_API unsigned int d3d9Format() const;
NVTT_API unsigned int dxgiFormat() const;
//NVTT_API bool setD3D9Format(unsigned int format); //NVTT_API bool setD3D9Format(unsigned int format);
//NVTT_API unsigned int dxgiFormat() const;
//NVTT_API bool setDxgiFormat(unsigned int format); //NVTT_API bool setDxgiFormat(unsigned int format);
}; };
@ -253,6 +269,14 @@ namespace nvtt
AlphaMode_Premultiplied, AlphaMode_Premultiplied,
}; };
// Extents shape restrictions
enum ShapeRestriction
{
ShapeRestriction_None,
ShapeRestriction_Square,
};
// Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1) // Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1)
struct InputOptions struct InputOptions
{ {
@ -344,7 +368,7 @@ namespace nvtt
{ {
Container_DDS, Container_DDS,
Container_DDS10, Container_DDS10,
// Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/ Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/
// Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format // Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format
}; };
@ -439,6 +463,9 @@ namespace nvtt
ToneMapper_Lightmap, ToneMapper_Lightmap,
}; };
// Transform the given x,y coordinates.
typedef void WarpFunction(float & x, float & y, float & d);
// A surface is one level of a 2D or 3D texture. (New in NVTT 2.1) // A surface is one level of a 2D or 3D texture. (New in NVTT 2.1)
// @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression. // @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression.
@ -486,7 +513,8 @@ namespace nvtt
NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0); NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter); NVTT_API void resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter);
NVTT_API void autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter);
NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1); NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1);
NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1); NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1);
@ -554,6 +582,10 @@ namespace nvtt
NVTT_API void flipZ(); NVTT_API void flipZ();
NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const; NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const;
NVTT_API Surface warp(int w, int h, WarpFunction * f) const;
NVTT_API Surface warp(int w, int h, int d, WarpFunction * f) const;
// Copy image data. // Copy image data.
NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel); NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel);
NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel); NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel);

View File

@ -146,9 +146,16 @@ static const char * s_witnessImageSet[] = {
}; };
static const char * s_witnessLmapImageSet[] = { static const char * s_witnessLmapImageSet[] = {
"specruin.dds", "hallway.dds",
"cottage.dds", "windmill.dds",
"tunnel.dds",
"theater.dds",
"tower.dds", "tower.dds",
"hub.dds",
"mine.dds",
"archway.dds",
"hut.dds",
"shaft.dds",
}; };
static const char * s_normalMapImageSet[] = { static const char * s_normalMapImageSet[] = {
@ -187,8 +194,14 @@ enum Mode {
Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Paraboloid,
Mode_BC5_Normal_Quartic, Mode_BC5_Normal_Quartic,
//Mode_BC5_Normal_DualParaboloid, //Mode_BC5_Normal_DualParaboloid,
Mode_BC6, Mode_BC6,
Mode_BC7, Mode_BC7,
Mode_ETC1_IC,
Mode_ETC1_EtcLib,
Mode_ETC2_EtcLib,
Mode_ETC1_RgEtc,
Mode_ETC2_RGBM,
Mode_PVR,
Mode_Count Mode_Count
}; };
static const char * s_modeNames[] = { static const char * s_modeNames[] = {
@ -207,8 +220,14 @@ static const char * s_modeNames[] = {
"BC5-Normal-Paraboloid", // Mode_BC5_Normal_Paraboloid, "BC5-Normal-Paraboloid", // Mode_BC5_Normal_Paraboloid,
"BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic, "BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic,
//"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid, //"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid,
"BC6", // Mode_BC6, "BC6", // Mode_BC6,
"BC7", // Mode_BC7, "BC7", // Mode_BC7,
"ETC1-IC",
"ETC1-EtcLib",
"ETC2-EtcLib",
"ETC1-RgEtc",
"ETC2-RGBM",
"PVR",
}; };
nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count); nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count);
@ -218,14 +237,16 @@ struct Test {
Mode modes[6]; Mode modes[6];
}; };
static Test s_imageTests[] = { static Test s_imageTests[] = {
{"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}}, /*0*/ {"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}},
{"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}}, /*1*/ {"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}},
//{"Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}}, /*2*/ {"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}},
{"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}}, /*3*/ {"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}},
{"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}}, /*4*/ {"HDR", 3, {Mode_ETC2_RGBM, Mode_BC3_RGBM, Mode_BC6}},
{"HDR", 2, {Mode_BC3_RGBM, Mode_BC6}}, /*5*/ {"BC6", 1, {Mode_BC6}},
{"BC6", 1, {Mode_BC6}}, /*6*/ {"BC7", 1, {Mode_BC7}},
{"BC7", 1, {Mode_BC7}}, /*7*/ {"ETC", 3, {Mode_ETC1_IC, Mode_ETC1_RgEtc, Mode_ETC2_EtcLib}},
/*8*/ {"Color Mobile", 4, {Mode_PVR, Mode_ETC1_IC, Mode_ETC2_EtcLib, Mode_BC1}},
/*9*/ //{"ETC-Lightmap", 2, {Mode_BC3_RGBM, Mode_ETC_RGBM}},
}; };
const int s_imageTestCount = ARRAY_SIZE(s_imageTests); const int s_imageTestCount = ARRAY_SIZE(s_imageTests);
@ -404,10 +425,10 @@ int main(int argc, char *argv[])
i++; i++;
} }
} }
else else
{ {
printf("Warning: unrecognized option \"%s\"\n", argv[i]); printf("Warning: unrecognized option \"%s\"\n", argv[i]);
} }
} }
// Validate inputs. // Validate inputs.
@ -462,7 +483,8 @@ int main(int argc, char *argv[])
} }
else else
{ {
compressionOptions.setQuality(nvtt::Quality_Production); compressionOptions.setQuality(nvtt::Quality_Normal);
//compressionOptions.setQuality(nvtt::Quality_Production);
} }
//compressionOptions.setExternalCompressor("ati"); //compressionOptions.setExternalCompressor("ati");
//compressionOptions.setExternalCompressor("squish"); //compressionOptions.setExternalCompressor("squish");
@ -515,13 +537,13 @@ int main(int argc, char *argv[])
// Labels on the left side. // Labels on the left side.
if (errorMode == ErrorMode_RMSE) { if (errorMode == ErrorMode_RMSE) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01"; graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.03,0.01";
} }
else if (errorMode == ErrorMode_CieLab) { else if (errorMode == ErrorMode_CieLab) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,4,22,1"; graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,4,22,1";
} }
else if (errorMode == ErrorMode_AngularRMSE) { else if (errorMode == ErrorMode_AngularRMSE) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01"; graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.2,0.02"; // 0.05,0.01
} }
// Labels at the bottom. // Labels at the bottom.
@ -583,14 +605,13 @@ int main(int argc, char *argv[])
} }
Timer timer; Timer timer;
//int failedTests = 0; //int failedTests = 0;
//float totalDiff = 0; //float totalDiff = 0;
nvtt::Surface img; nvtt::Surface img;
printf("Running Test: %s with Set: %s\n", test.name, set.name); printf("Running test '%s' with set '%s'\n", test.name, set.name);
graphWriter << "&chd=t:"; graphWriter << "&chd=t:";
@ -602,10 +623,11 @@ int main(int argc, char *argv[])
Mode mode = test.modes[t]; Mode mode = test.modes[t];
nvtt::Format format; nvtt::Format format;
const char * compressor_name = NULL;
if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal || mode == Mode_BC3_RGBS) { if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal || mode == Mode_BC3_RGBS) {
format = nvtt::Format_BC1; format = nvtt::Format_BC1;
} }
else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM || mode == Mode_BC3_LUVW) { else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_LUVW) {
format = nvtt::Format_BC3; format = nvtt::Format_BC3;
} }
else if (mode == Mode_BC3_Normal) { else if (mode == Mode_BC3_Normal) {
@ -614,20 +636,51 @@ int main(int argc, char *argv[])
else if (mode == Mode_BC5_Normal || mode == Mode_BC5_Normal_Stereographic || mode == Mode_BC5_Normal_Paraboloid || mode == Mode_BC5_Normal_Quartic) { else if (mode == Mode_BC5_Normal || mode == Mode_BC5_Normal_Stereographic || mode == Mode_BC5_Normal_Paraboloid || mode == Mode_BC5_Normal_Quartic) {
format = nvtt::Format_BC5; format = nvtt::Format_BC5;
} }
else if (mode == Mode_BC6) else if (mode == Mode_BC3_RGBM) {
{ format = nvtt::Format_BC3_RGBM;
format = nvtt::Format_BC6; }
} else if (mode == Mode_BC6)
else if (mode == Mode_BC7) {
{ format = nvtt::Format_BC6;
format = nvtt::Format_BC7; }
} else if (mode == Mode_BC7)
else {
{ format = nvtt::Format_BC7;
nvDebugCheck(false); }
} else if (mode == Mode_ETC1_IC)
{
format = nvtt::Format_ETC1;
}
else if (mode == Mode_ETC1_EtcLib)
{
format = nvtt::Format_ETC1;
compressor_name = "etclib";
}
else if (mode == Mode_ETC2_EtcLib)
{
format = nvtt::Format_ETC2_RGB;
compressor_name = "etclib";
}
else if (mode == Mode_ETC1_RgEtc)
{
format = nvtt::Format_ETC1;
compressor_name = "rg_etc";
}
else if (mode == Mode_ETC2_RGBM)
{
format = nvtt::Format_ETC2_RGBM;
}
else if (mode == Mode_PVR)
{
format = nvtt::Format_PVR_4BPP_RGB;
}
else
{
nvUnreachable();
}
compressionOptions.setFormat(format); compressionOptions.setFormat(format);
if (compressor_name) compressionOptions.setExternalCompressor(compressor_name);
if (set.type == ImageType_RGBA) { if (set.type == ImageType_RGBA) {
img.setAlphaMode(nvtt::AlphaMode_Transparency); img.setAlphaMode(nvtt::AlphaMode_Transparency);
@ -653,6 +706,7 @@ int main(int argc, char *argv[])
printf("Input image '%s' not found.\n", set.fileNames[i]); printf("Input image '%s' not found.\n", set.fileNames[i]);
return EXIT_FAILURE; return EXIT_FAILURE;
} }
float color_range = 0.0f;
if (img.isNormalMap()) { if (img.isNormalMap()) {
img.normalizeNormalMap(); img.normalizeNormalMap();
@ -693,16 +747,34 @@ int main(int argc, char *argv[])
tmp.clamp(2); tmp.clamp(2);
tmp.clamp(3); tmp.clamp(3);
} }
else if (mode == Mode_BC3_RGBM) { else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) {
tmp.setAlphaMode(nvtt::AlphaMode_None); float r, g, b;
if (set.type == ImageType_HDR) { tmp.range(0, NULL, &r);
// Transform to gamma-2.0 space before applying RGBM - helps a lot with banding in the darks. tmp.range(1, NULL, &g);
tmp.toGamma(2.0f); tmp.range(2, NULL, &b);
tmp.toRGBM(3.0f); // range of 3.0 in gamma-2.0 space == range of 9.0 in linear space color_range = max3(r, g, b);
printf("color range = %f\n", color_range);
tmp.setAlphaMode(nvtt::AlphaMode_Transparency);
const float max_color_range = 16.0f;
if (color_range > max_color_range) {
color_range = max_color_range;
} }
else {
tmp.toRGBM(); for (int i = 0; i < 3; i++) {
tmp.scaleBias(i, 1.0f / color_range, 0.0f);
} }
tmp.toneMap(nvtt::ToneMapper_Linear, /*parameters=*/NULL); // Clamp without changing the hue.
// Clamp alpha.
tmp.clamp(3);
// To gamma.
tmp.toGamma(2);
compressionOptions.setRGBMThreshold(0.2f);
} }
else if (mode == Mode_BC3_LUVW) { else if (mode == Mode_BC3_LUVW) {
tmp.setAlphaMode(nvtt::AlphaMode_None); tmp.setAlphaMode(nvtt::AlphaMode_None);
@ -781,14 +853,25 @@ int main(int argc, char *argv[])
}*/ }*/
} }
} }
else if (mode == Mode_BC3_RGBM) { else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) {
if (set.type == ImageType_HDR) { /*if (set.type == ImageType_HDR) {
img_out.fromRGBM(3.0f); //img_out.fromRGBM(3.0f);
img_out.toLinear(2.0f); img_out.fromRGBM(range);
img_out.toLinear(2.0f);
} }
else { else {
img_out.fromRGBM(); img_out.fromRGBM();
}*/
img_out.fromRGBM(1.0f, 0.2f);
img_out.toLinear(2);
for (int i = 0; i < 3; i++) {
img_out.scaleBias(i, color_range, 0.0f);
} }
img_out.copyChannel(img, 3); // Copy alpha channel from source.
img_out.setAlphaMode(nvtt::AlphaMode_Transparency);
} }
else if (mode == Mode_BC3_LUVW) { else if (mode == Mode_BC3_LUVW) {
if (set.type == ImageType_HDR) { if (set.type == ImageType_HDR) {

View File

@ -61,6 +61,9 @@ struct MyAssertHandler : public nv::AssertHandler {
virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) { virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) {
fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line); fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line);
nv::debug::dumpInfo(); nv::debug::dumpInfo();
if (nv::debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG;
}
exit(1); exit(1);
} }
}; };

View File

@ -154,11 +154,13 @@ int main(int argc, char *argv[])
bool loadAsFloat = false; bool loadAsFloat = false;
bool rgbm = false; bool rgbm = false;
bool rangescale = false; bool rangescale = false;
bool srgb = false;
const char * externalCompressor = NULL; const char * externalCompressor = NULL;
bool silent = false; bool silent = false;
bool dds10 = false; bool dds10 = false;
bool ktx = false;
nv::Path input; nv::Path input;
nv::Path output; nv::Path output;
@ -285,6 +287,31 @@ int main(int argc, char *argv[])
format = nvtt::Format_BC3_RGBM; format = nvtt::Format_BC3_RGBM;
rgbm = true; rgbm = true;
} }
else if (strcmp("-etc1", argv[i]) == 0)
{
format = nvtt::Format_ETC1;
}
else if (strcmp("-etc2", argv[i]) == 0 || strcmp("-etc2_rgb", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGB;
}
else if (strcmp("-etc2_eac", argv[i]) == 0 || strcmp("-etc2_rgba", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGBA;
}
else if (strcmp("-eac", argv[i]) == 0 || strcmp("-etc2_r", argv[i]) == 0)
{
format = nvtt::Format_ETC2_R;
}
else if (strcmp("-etc2_rg", argv[i]) == 0)
{
format = nvtt::Format_ETC2_R;
}
else if (strcmp("-etc2_rgbm", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGBM;
rgbm = true;
}
// Undocumented option. Mainly used for testing. // Undocumented option. Mainly used for testing.
else if (strcmp("-ext", argv[i]) == 0) else if (strcmp("-ext", argv[i]) == 0)
@ -309,6 +336,14 @@ int main(int argc, char *argv[])
{ {
dds10 = true; dds10 = true;
} }
else if (strcmp("-ktx", argv[i]) == 0)
{
ktx = true;
}
else if (strcmp("-srgb", argv[i]) == 0)
{
srgb = true;
}
else if (argv[i][0] != '-') else if (argv[i][0] != '-')
{ {
@ -321,15 +356,23 @@ int main(int argc, char *argv[])
{ {
output.copy(input.str()); output.copy(input.str());
output.stripExtension(); output.stripExtension();
output.append(".dds");
if (ktx)
{
output.append(".ktx");
}
else
{
output.append(".dds");
}
} }
break; break;
} }
else else
{ {
printf("Warning: unrecognized option \"%s\"\n", argv[i]); printf("Warning: unrecognized option \"%s\"\n", argv[i]);
} }
} }
const uint version = nvtt::version(); const uint version = nvtt::version();
@ -380,7 +423,9 @@ int main(int argc, char *argv[])
printf("Output options:\n"); printf("Output options:\n");
printf(" -silent \tDo not output progress messages\n"); printf(" -silent \tDo not output progress messages\n");
printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7)\n\n"); printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7, unless ktx is being used)\n");
printf(" -ktx \tUse KTX container format\n");
printf(" -srgb \tIf the requested format allows it, output will be in sRGB color space\n\n");
return EXIT_FAILURE; return EXIT_FAILURE;
} }
@ -398,7 +443,7 @@ int main(int argc, char *argv[])
bool useSurface = false; // @@ use Surface API in all cases! bool useSurface = false; // @@ use Surface API in all cases!
nvtt::Surface image; nvtt::Surface image;
if (format == nvtt::Format_BC3_RGBM || rgbm) { if (format == nvtt::Format_BC3_RGBM || format == nvtt::Format_ETC2_RGBM || rgbm) {
useSurface = true; useSurface = true;
if (!image.load(input.str())) { if (!image.load(input.str())) {
@ -440,7 +485,7 @@ int main(int argc, char *argv[])
// To gamma. // To gamma.
image.toGamma(2); image.toGamma(2);
if (format != nvtt::Format_BC3_RGBM) { if (format != nvtt::Format_BC3_RGBM || format != nvtt::Format_ETC2_RGBM) {
image.setAlphaMode(nvtt::AlphaMode_None); image.setAlphaMode(nvtt::AlphaMode_None);
image.toRGBM(1, 0.15f); image.toRGBM(1, 0.15f);
} }
@ -494,7 +539,7 @@ int main(int argc, char *argv[])
nvDebugCheck(dds.isTextureArray()); nvDebugCheck(dds.isTextureArray());
inputOptions.setTextureLayout(nvtt::TextureType_Array, dds.width(), dds.height(), 1, dds.arraySize()); inputOptions.setTextureLayout(nvtt::TextureType_Array, dds.width(), dds.height(), 1, dds.arraySize());
faceCount = dds.arraySize(); faceCount = dds.arraySize();
dds10 = true; dds10 = ktx ? false : true;
} }
uint mipmapCount = dds.mipmapCount(); uint mipmapCount = dds.mipmapCount();
@ -569,11 +614,12 @@ int main(int argc, char *argv[])
inputOptions.setAlphaMode(nvtt::AlphaMode_None); inputOptions.setAlphaMode(nvtt::AlphaMode_None);
} }
// IC: Do not enforce D3D9 restrictions anymore.
// Block compressed textures with mipmaps must be powers of two. // Block compressed textures with mipmaps must be powers of two.
if (!noMipmaps && format != nvtt::Format_RGB) /*if (!noMipmaps && format != nvtt::Format_RGB)
{ {
inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo); inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo);
} }*/
if (normal) if (normal)
{ {
@ -720,15 +766,27 @@ int main(int argc, char *argv[])
outputOptions.setOutputHandler(&outputHandler); outputOptions.setOutputHandler(&outputHandler);
outputOptions.setErrorHandler(&errorHandler); outputOptions.setErrorHandler(&errorHandler);
// Automatically use dds10 if compressing to BC6 or BC7 if (ktx)
if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7)
{
dds10 = true;
}
if (dds10)
{ {
outputOptions.setContainer(nvtt::Container_DDS10); outputOptions.setContainer(nvtt::Container_KTX);
}
else
{
// Automatically use dds10 if compressing to BC6 or BC7
if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7) {
dds10 = true;
}
if (dds10) {
outputOptions.setContainer(nvtt::Container_DDS10);
}
else {
outputOptions.setContainer(nvtt::Container_DDS);
}
}
if (srgb) {
outputOptions.setSrgbFlag(true);
} }
// printf("Press ENTER.\n"); // printf("Press ENTER.\n");

View File

@ -99,8 +99,8 @@ int main(int argc, char *argv[])
return 1; return 1;
} }
break; break;
} }
} }
if (input.isNull() || output.isNull()) if (input.isNull() || output.isNull())
@ -136,21 +136,21 @@ int main(int argc, char *argv[])
nv::FloatImage fimage(&image); nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma); fimage.toLinear(0, 3, gamma);
uint thumbW, thumbH; uint thumbW, thumbH;
if (image.width() > image.height()) if (image.width() > image.height())
{ {
thumbW = size; thumbW = size;
thumbH = uint ((float (image.height()) / float (image.width())) * size); thumbH = uint ((float (image.height()) / float (image.width())) * size);
} }
else else
{ {
thumbW = uint ((float (image.width()) / float (image.height())) * size); thumbW = uint ((float (image.width()) / float (image.height())) * size);
thumbH = size; thumbH = size;
} }
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp)); nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma)); nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB); result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output.str()); nv::StdOutputStream stream(output.str());
nv::ImageIO::save(output.str(), stream, result.ptr(), metaData.buffer()); nv::ImageIO::save(output.str(), stream, result.ptr(), metaData.buffer());