Merge changes from The Witness.

pull/276/head
Ignacio 6 years ago
parent 2075d740c9
commit 9489aed825

@ -31,7 +31,7 @@ MESSAGE(STATUS " Processor: ${NV_SYSTEM_PROCESSOR}")
MESSAGE(STATUS " Compiler Flags: ${CMAKE_CXX_FLAGS}")
IF(CMAKE_BUILD_TYPE MATCHES "debug")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.")
SET(CMAKE_DEBUG_POSTFIX "_d" CACHE STRING "Postfix for debug build libraries.")
ADD_DEFINITIONS(-D_DEBUG=1)
ENDIF()

@ -1,6 +1,6 @@
NVIDIA Texture Tools is licensed under the MIT license.
Copyright (c) 2009-2016 Ignacio Castano
Copyright (c) 2009-2017 Ignacio Castaño
Copyright (c) 2007-2009 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person

@ -6,7 +6,7 @@ manipulation tools, designed to be integrated in game tools and asset
processing pipelines.
The primary features of the library are mipmap and normal map generation, format
conversion and DXT compression.
conversion, and DXT compression.
### How to build (Windows)
@ -42,5 +42,5 @@ src/nvtt/tools/compress.cpp
Detailed documentation of the API can be found at:
http://code.google.com/p/nvidia-texture-tools/wiki/ApiDocumentation
https://github.com/castano/nvidia-texture-tools/wiki/ApiDocumentation

@ -36,4 +36,6 @@ do
#./nvcompress -silent -alpha -nomips -bc6 $file.$EXT $file.bc6.dds
#./nvimgdiff -alpha $file.$EXT $file.bc6.dds
# ETC2-EAC
./nvcompress -silent -alpha -nomips -etc_rgbm
done

@ -349,9 +349,18 @@ LLVM:
# define POSH_OS_STRING "UNICOS"
#endif
#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
# define POSH_OS_OSX 1
# define POSH_OS_STRING "MacOS X"
//ACS if we're in xcode, look at the target conditionals to figure out if this is ios or osx
#if defined __APPLE__
# include "TargetConditionals.h"
#endif
#if TARGET_OS_IPHONE
# define POSH_OS_IOS 1
# define POSH_OS_STRING "iOS"
#else
# if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
# define POSH_OS_OSX 1
# define POSH_OS_STRING "MacOS X"
# endif
#endif
#if defined __sun__ || defined sun || defined __sun || defined __solaris__

@ -1808,7 +1808,7 @@ typedef unsigned long uint64;
{
if (block_inten[0] > m_pSorted_luma[n - 1])
{
const uint min_error = labs(int(block_inten[0] - m_pSorted_luma[n - 1]));
const uint min_error = abs(int(block_inten[0] - m_pSorted_luma[n - 1]));
if (min_error >= trial_solution.m_error)
continue;
}
@ -1822,7 +1822,7 @@ typedef unsigned long uint64;
{
if (m_pSorted_luma[0] > block_inten[3])
{
const uint min_error = labs(int(m_pSorted_luma[0] - block_inten[3]));
const uint min_error = abs(int(m_pSorted_luma[0] - block_inten[3]));
if (min_error >= trial_solution.m_error)
continue;
}
@ -1914,7 +1914,7 @@ done:
for (uint packed_c = 0; packed_c < limit; packed_c++)
{
int v = etc1_decode_value(diff, inten, selector, packed_c);
uint err = labs(v - static_cast<int>(color));
uint err = abs(v - static_cast<int>(color));
if (err < best_error)
{
best_error = err;

@ -14,6 +14,7 @@ SET(BC6H_SRCS
zohtwo.cpp)
ADD_LIBRARY(bc6h STATIC ${BC6H_SRCS})
TARGET_LINK_LIBRARIES(bc6h nvcore nvmath)
IF(NOT WIN32)
IF(CMAKE_COMPILER_IS_GNUCXX)

@ -37,7 +37,7 @@ int Utils::lerp(int a, int b, int i, int denom)
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15: weights = denom15_weights_64; break;
case 7: weights = denom7_weights_64; break;
default: nvDebugCheck(0);
default: nvUnreachable();
}
return (a*weights[denom-i] +b*weights[i] + round) >> shift;

@ -584,7 +584,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
{
Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
float err = 0;
//float err = 0;
for (int region=0; region<NREGIONS_ONE; ++region)
{

@ -672,7 +672,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
{
Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
float err = 0;
//float err = 0;
for (int region=0; region<NREGIONS_TWO; ++region)
{

@ -22,6 +22,7 @@ SET(BC7_SRCS
avpcl_utils.h)
ADD_LIBRARY(bc7 STATIC ${BC7_SRCS})
TARGET_LINK_LIBRARIES(bc7 nvcore nvmath)
TARGET_LINK_LIBRARIES(bc7 nvmath)

@ -243,7 +243,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex,
static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
{
int mode = AVPCL::getmode(in);
//int mode = AVPCL::getmode(in);
pat_index = 0;
nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
@ -580,7 +580,7 @@ static float exhaustive(const Vector4 colors[], const float importance[], int np
int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
//bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
int amin, bmin;

@ -148,7 +148,7 @@ namespace nv
NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
#if NV_CC_MSVC
#if NV_NEED_PSEUDOINDEX_WRAPPER
NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
return m_buffer[i(this)];
}

@ -27,7 +27,7 @@
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline)) inline
#define NV_DEPRECATED __attribute__((deprecated))
#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX
#define NV_THREAD_LOCAL __thread
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))

@ -31,11 +31,6 @@ bool FileSystem::exists(const char * path)
// PathFileExists requires linking to shlwapi.lib
//return PathFileExists(path) != 0;
return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES;
#elif NV_OS_ORBIS
const int BUFFER_SIZE = 2048;
char file_fullpath[BUFFER_SIZE];
snprintf(file_fullpath, BUFFER_SIZE, "/app0/%s", path);
return sceFiosExistsSync(NULL, file_fullpath);
#else
if (FILE * fp = fopen(path, "r"))
{
@ -78,3 +73,31 @@ bool FileSystem::removeFile(const char * path)
// @@ Use unlink or remove?
return remove(path) == 0;
}
#include "StdStream.h" // for fileOpen
bool FileSystem::copyFile(const char * src, const char * dst) {
FILE * fsrc = fileOpen(src, "rb");
if (fsrc == NULL) return false;
NV_ON_RETURN(fclose(fsrc));
FILE * fdst = fileOpen(dst, "wb");
if (fdst == NULL) return false;
NV_ON_RETURN(fclose(fdst));
char buffer[1024];
size_t n;
while ((n = fread(buffer, sizeof(char), sizeof(buffer), fsrc)) > 0) {
if (fwrite(buffer, sizeof(char), n, fdst) != n) {
return false;
}
}
return true;
}

@ -15,7 +15,7 @@ namespace nv
NVCORE_API bool createDirectory(const char * path);
NVCORE_API bool changeDirectory(const char * path);
NVCORE_API bool removeFile(const char * path);
NVCORE_API bool copyFile(const char * src, const char * dst);
} // FileSystem namespace
} // nv namespace

@ -33,6 +33,8 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
#else // If typeof not available:
#define NV_NEED_PSEUDOINDEX_WRAPPER 1
#include <new> // placement new
struct PseudoIndexWrapper {

@ -2,6 +2,7 @@
#include "Memory.h"
#include "Debug.h"
#include "Utils.h"
#include <stdlib.h>
@ -56,6 +57,7 @@ void * realloc(void * ptr, size_t size)
#endif
}
/* No need to override this unless we want line info.
void * operator new (size_t size) throw()
{
@ -116,4 +118,32 @@ void operator delete(void* p, const std::nothrow_t&) throw()
#endif // NV_OVERRIDE_ALLOC
void * nv::aligned_malloc(size_t size, size_t alignment)
{
// alignment must be a power of two, multiple of sizeof(void*)
nvDebugCheck(isPowerOfTwo(alignment));
nvDebugCheck((alignment & (sizeof(void*) - 1)) == 0);
#if NV_OS_WIN32 || NV_OS_DURANGO
return _aligned_malloc(size, alignment);
#elif NV_OS_DARWIN && !NV_OS_IOS
void * ptr = NULL;
posix_memalign(&ptr, alignment, size);
return ptr;
#elif NV_OS_LINUX
return memalign(alignment, size)
#else // NV_OS_ORBIS || NV_OS_IOS
// @@ IC: iOS appears to be 16 byte aligned, should we check alignment and assert if we request a higher alignment factor?
return ::malloc(size);
#endif
}
void nv::aligned_free(void * ptr)
{
#if NV_OS_WIN32 || NV_OS_DURANGO
_aligned_free(ptr);
#else
::free(ptr);
#endif
}

@ -7,10 +7,16 @@
#include "nvcore.h"
#include <stdlib.h> // malloc(), realloc() and free()
#include <string.h> // memset
//#include <stddef.h> // size_t
//#include <new> // new and delete
#define TRACK_MEMORY_LEAKS 0
#if TRACK_MEMORY_LEAKS
#include <vld.h>
#endif
#if NV_CC_GNUC
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
@ -41,6 +47,8 @@ extern "C" {
#endif
namespace nv {
NVCORE_API void * aligned_malloc(size_t size, size_t alignment);
NVCORE_API void aligned_free(void * );
// C++ helpers.
template <typename T> NV_FORCEINLINE T * malloc(size_t count) {

@ -113,7 +113,7 @@ namespace nv
public:
// BaseClass must implement addRef() and release().
typedef SmartPtr<BaseClass> ThisType;
typedef SmartPtr<BaseClass> ThisType;
/// Default ctor.
SmartPtr() : m_ptr(NULL)

@ -213,9 +213,12 @@ namespace nv
#elif NV_OS_LINUX
return (uint)fread_unlocked(data, 1, len, m_fp);
#elif NV_OS_DARWIN
// @@ No error checking, always returns len.
// This is rather lame. Not sure if it's faster than the locked version.
for (uint i = 0; i < len; i++) {
((char *)data)[i] = getc_unlocked(m_fp);
if (feof_unlocked(m_fp) != 0) {
return i;
}
}
return len;
#else

@ -347,26 +347,36 @@ StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg )
}
/** Append a string. */
StringBuilder & StringBuilder::append( const char * s )
// Append a character.
StringBuilder & StringBuilder::append( char c )
{
return append(s, U32(strlen( s )));
return append(&c, 1);
}
// Append a string.
StringBuilder & StringBuilder::append( const char * s )
{
return append(s, U32(strlen( s )));
}
/** Append a string. */
// Append a string.
StringBuilder & StringBuilder::append(const char * s, uint len)
{
nvDebugCheck(s != NULL);
uint offset = length();
const uint size = offset + len + 1;
reserve(size);
strCpy(m_str + offset, len + 1, s, len);
uint offset = length();
const uint size = offset + len + 1;
reserve(size);
strCpy(m_str + offset, len + 1, s, len);
return *this;
}
StringBuilder & StringBuilder::append(const StringBuilder & str)
{
return append(str.m_str, str.length());
}
/** Append a formatted string. */
StringBuilder & StringBuilder::appendFormat( const char * fmt, ... )
@ -516,6 +526,19 @@ StringBuilder & StringBuilder::copy( const StringBuilder & s )
return *this;
}
void StringBuilder::removeChar(char c)
{
char * src = strchr(m_str, c);
if (src) {
char * dst = src;
src++;
while (*src) {
*dst++ = *src++;
}
*dst = '\0';
}
}
bool StringBuilder::endsWith(const char * str) const
{
uint l = uint(strlen(str));
@ -530,7 +553,7 @@ bool StringBuilder::beginsWith(const char * str) const
return strncmp(m_str, str, l) == 0;
}
// Find given char starting from the end.
// Find given char starting from the end. Why not use strrchr!?
char * StringBuilder::reverseFind(char c)
{
int length = (int)strlen(m_str) - 1;
@ -563,6 +586,19 @@ char * StringBuilder::release()
return str;
}
// Take ownership of string.
void StringBuilder::acquire(char * str)
{
if (str) {
m_size = strLen(str) + 1;
m_str = str;
}
else {
m_size = 0;
m_str = NULL;
}
}
// Swap strings.
void nv::swap(StringBuilder & a, StringBuilder & b) {
swap(a.m_size, b.m_size);
@ -585,19 +621,20 @@ const char * Path::extension() const
/*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) {
nvCheck(path != NULL);
for (int i = 0;; i++) {
if (path[i] == '\0') break;
if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator;
if (path != NULL) {
for (int i = 0;; i++) {
if (path[i] == '\0') break;
if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator;
}
}
}
/// Toggles path separators (ie. \\ into /).
void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/)
{
nvCheck(!isNull());
translatePath(m_str, pathSeparator);
if (!isNull()) {
translatePath(m_str, pathSeparator);
}
}
void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/)

@ -105,8 +105,10 @@ namespace nv
StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
StringBuilder & formatList( const char * format, va_list arg );
StringBuilder & append(char c);
StringBuilder & append(const char * str);
StringBuilder & append(const char * str, uint len);
StringBuilder & append(const char * str, uint len);
StringBuilder & append(const StringBuilder & str);
StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
StringBuilder & appendFormatList(const char * format, va_list arg);
@ -122,6 +124,8 @@ namespace nv
StringBuilder & toLower();
StringBuilder & toUpper();
void removeChar(char c);
bool endsWith(const char * str) const;
bool beginsWith(const char * str) const;
@ -129,15 +133,16 @@ namespace nv
char * reverseFind(char c);
void reset();
bool isNull() const { return m_size == 0; }
NV_FORCEINLINE bool isNull() const { return m_size == 0; }
// const char * accessors
//operator const char * () const { return m_str; }
//operator char * () { return m_str; }
const char * str() const { return m_str; }
char * str() { return m_str; }
NV_FORCEINLINE const char * str() const { return m_str; }
NV_FORCEINLINE char * str() { return m_str; }
char * release();
char * release(); // Release ownership of string.
void acquire(char *); // Take ownership of string.
/// Implement value semantics.
StringBuilder & operator=( const StringBuilder & s ) {
@ -280,25 +285,25 @@ namespace nv
/// Equal operator.
bool operator==( const String & str ) const
{
return strMatch(str.data, data);
return strEqual(str.data, data);
}
/// Equal operator.
bool operator==( const char * str ) const
{
return strMatch(str, data);
return strEqual(str, data);
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
return !strMatch(str.data, data);
return !strEqual(str.data, data);
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
return !strMatch(str, data);
return !strEqual(str, data);
}
/// Returns true if this string is the null string.

@ -76,13 +76,13 @@ namespace nv
void advance(uint offset) { seek(tell() + offset); }
// friends
// friends
friend Stream & operator<<( Stream & s, bool & c ) {
#if NV_OS_DARWIN && !NV_CC_CPP11
nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0;
s.serialize( &b, 1 );
c = (b == 1);
c = (b != 0);
#else
nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 );

@ -39,6 +39,28 @@ namespace nv
// These intentionally look like casts.
// uint64 casts:
template <typename T> inline uint64 U64(T x) { return x; }
//template <> inline uint64 U64<uint64>(uint64 x) { return x; }
template <> inline uint64 U64<int64>(int64 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U32<uint32>(uint32 x) { return x; }
template <> inline uint64 U64<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U64<uint16>(uint16 x) { return x; }
template <> inline uint64 U64<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U64<uint8>(uint8 x) { return x; }
template <> inline uint64 U64<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint64)x; }
// int64 casts:
template <typename T> inline int64 I64(T x) { return x; }
template <> inline int64 I64<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT64_MAX); return (int64)x; }
//template <> inline uint64 U64<int64>(int64 x) { return x; }
//template <> inline uint64 U32<uint32>(uint32 x) { return x; }
//template <> inline uint64 U64<int32>(int32 x) { return x; }
//template <> inline uint64 U64<uint16>(uint16 x) { return x; }
//template <> inline uint64 U64<int16>(int16 x) { return x; }
//template <> inline uint64 U64<uint8>(uint8 x) { return x; }
//template <> inline uint64 U64<int8>(int8 x) { return x; }
// uint32 casts:
template <typename T> inline uint32 U32(T x) { return x; }
template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
@ -50,6 +72,11 @@ namespace nv
//template <> inline uint32 U32<uint8>(uint8 x) { return x; }
template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
#if NV_OS_DARWIN
template <> inline uint32 U32<unsigned long>(unsigned long x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
template <> inline uint32 U32<long>(long x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
#endif
// int32 casts:
template <typename T> inline int32 I32(T x) { return x; }
template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
@ -182,7 +209,7 @@ namespace nv
* @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
* @note nextPowerOfTwo(x) = 2 << log2(x-1)
*/
inline uint nextPowerOfTwo( uint x )
inline uint32 nextPowerOfTwo(uint32 x)
{
nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
@ -202,8 +229,19 @@ namespace nv
#endif
}
/// Return true if @a n is a power of two.
inline bool isPowerOfTwo( uint n )
inline uint64 nextPowerOfTwo(uint64 x)
{
nvDebugCheck(x != 0);
uint p = 1;
while (x > p) {
p += p;
}
return p;
}
// @@ Should I just use a macro instead?
template <typename T>
inline bool isPowerOfTwo(T n)
{
return (n & (n-1)) == 0;
}

@ -56,6 +56,7 @@
# define NV_OS_MINGW 1
# define NV_OS_WIN32 1
#elif defined POSH_OS_OSX
# define NV_OS_OSX 1 // IC: Adding this, because iOS defines NV_OS_DARWIN too.
# define NV_OS_DARWIN 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_IOS
@ -78,9 +79,9 @@
// Threading:
// some platforms don't implement __thread or similar for thread-local-storage
#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS
# define NV_OS_USE_PTHREAD 1
# if NV_OS_DARWIN || NV_OS_IOS
# if 0 //Apple finally added TLS support to iOS!// NV_OS_IOS
# define NV_OS_HAS_TLS_QUALIFIER 0
# else
# define NV_OS_HAS_TLS_QUALIFIER 1
@ -96,7 +97,7 @@
// NV_CPU_X86_64
// NV_CPU_PPC
// NV_CPU_ARM
// NV_CPU_AARCH64
// NV_CPU_ARM_64
#define NV_CPU_STRING POSH_CPU_STRING
@ -110,7 +111,7 @@
#elif defined POSH_CPU_STRONGARM
# define NV_CPU_ARM 1
#elif defined POSH_CPU_AARCH64
# define NV_CPU_AARCH64 1
# define NV_CPU_ARM_64 1
#else
# error "Unsupported CPU"
#endif
@ -148,10 +149,16 @@
#endif
// Endiannes:
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
#define NV_BIG_ENDIAN POSH_BIG_ENDIAN
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING
// @@ POSH endian detection is broken for arm64 on iOS. They are bi-endian and iOS sets all their processors to little endian by default.
#if NV_OS_IOS
# define NV_LITTLE_ENDIAN 1
# define NV_BIG_ENDIAN 0
# define NV_ENDIAN_STRING "little"
#else
# define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
# define NV_BIG_ENDIAN POSH_BIG_ENDIAN
# define NV_ENDIAN_STRING POSH_ENDIAN_STRING
#endif
// Define the right printf prefix for size_t arguments:
#if POSH_64BIT_POINTER
@ -164,6 +171,28 @@
// cmake config
#include "nvconfig.h"
#if NV_OS_DARWIN
#include <stdint.h>
//#include <inttypes.h>
// Type definitions:
typedef uint8_t uint8;
typedef int8_t int8;
typedef uint16_t uint16;
typedef int16_t int16;
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint64_t uint64;
typedef int64_t int64;
// POSH gets this wrong due to __LP64__
#undef POSH_I64_PRINTF_PREFIX
#define POSH_I64_PRINTF_PREFIX "ll"
#else
// Type definitions:
typedef posh_u8_t uint8;
@ -175,8 +204,23 @@ typedef posh_i16_t int16;
typedef posh_u32_t uint32;
typedef posh_i32_t int32;
//#if NV_OS_DARWIN
// OSX-64 is supposed to be LP64 (longs and pointers are 64 bits), thus uint64 is defined as
// unsigned long. However, some OSX headers define it as unsigned long long, producing errors,
// even though both types are 64 bit. Ideally posh should handle that, but it has not been
// updated in ages, so here I'm just falling back to the standard C99 types defined in inttypes.h
//#include <inttypes.h>
//typedef posh_u64_t uint64_t;
//typedef posh_i64_t int64_t;
//#else
typedef posh_u64_t uint64;
typedef posh_i64_t int64;
//#endif
#if NV_OS_DARWIN
// To avoid duplicate definitions.
#define _UINT64
#endif
#endif
// Aliases
typedef uint32 uint;
@ -246,8 +290,10 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#include <stddef.h> // for size_t
template <typename T, size_t N> char (&ArraySizeHelper(T (&array)[N]))[N];
#define NV_ARRAY_SIZE(x) sizeof(ArraySizeHelper(x))
//#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#if 0 // Disabled in The Witness.
#if NV_CC_MSVC
@ -269,8 +315,38 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
}
namespace nv {
template <typename F>
struct ScopeExit {
ScopeExit(F f) : f(f) {}
~ScopeExit() { f(); }
F f;
};
template <typename F>
ScopeExit<F> MakeScopeExit(F f) {
return ScopeExit<F>(f);
};
}
#define NV_ON_RETURN(code) \
auto NV_STRING_JOIN2(scope_exit_, __LINE__) = nv::MakeScopeExit([=](){code;})
// Indicate the compiler that the parameter is not used to suppress compier warnings.
#if NV_CC_MSVC
#define NV_UNUSED(a) ((a)=(a))
#else
#define NV_UNUSED(a) _Pragma(NV_STRING(unused(a)))
#endif
#if NV_CC_GNUC || NV_CC_CLANG
#define NV_LIKELY(x) __builtin_expect(!!(x), 1)
#define NV_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define NV_LIKELY(x) x
#define NV_UNLIKELY(x) x
#endif
// Null index. @@ Move this somewhere else... it's only used by nvmesh.
//const unsigned int NIL = unsigned int(~0);

@ -632,44 +632,45 @@ void BlockCTX1::setIndices(int * idx)
/// Decode BC6 block.
void BlockBC6::decodeBlock(Vector3 colors[16]) const
{
ZOH::Tile tile(4, 4);
ZOH::decompress((const char *)data, tile);
// Convert ZOH's tile struct to Vector3, and convert half to float.
for (uint y = 0; y < 4; ++y)
{
for (uint x = 0; x < 4; ++x)
{
uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x);
uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y);
uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z);
colors[y * 4 + x].x = to_float(rHalf);
colors[y * 4 + x].y = to_float(gHalf);
colors[y * 4 + x].z = to_float(bHalf);
}
}
void BlockBC6::decodeBlock(Vector4 colors[16]) const
{
ZOH::Tile tile(4, 4);
ZOH::decompress((const char *)data, tile);
// Convert ZOH's tile struct to Vector3, and convert half to float.
for (uint y = 0; y < 4; ++y)
{
for (uint x = 0; x < 4; ++x)
{
uint16 rHalf = ZOH::Tile::float2half(tile.data[y][x].x);
uint16 gHalf = ZOH::Tile::float2half(tile.data[y][x].y);
uint16 bHalf = ZOH::Tile::float2half(tile.data[y][x].z);
colors[y * 4 + x].x = to_float(rHalf);
colors[y * 4 + x].y = to_float(gHalf);
colors[y * 4 + x].z = to_float(bHalf);
colors[y * 4 + x].w = 1.0f;
}
}
}
/// Decode BC7 block.
void BlockBC7::decodeBlock(ColorBlock * block) const
{
AVPCL::Tile tile(4, 4);
AVPCL::decompress((const char *)data, tile);
// Convert AVPCL's tile struct back to NVTT's.
for (uint y = 0; y < 4; ++y)
{
for (uint x = 0; x < 4; ++x)
{
Vector4 rgba = tile.data[y][x];
// Note: decoded rgba values are in [0, 255] range and should be an integer,
// because BC7 never uses more than 8 bits per channel. So no need to round.
block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w));
}
}
AVPCL::Tile tile(4, 4);
AVPCL::decompress((const char *)data, tile);
// Convert AVPCL's tile struct back to NVTT's.
for (uint y = 0; y < 4; ++y)
{
for (uint x = 0; x < 4; ++x)
{
Vector4 rgba = tile.data[y][x];
// Note: decoded rgba values are in [0, 255] range and should be an integer,
// because BC7 never uses more than 8 bits per channel. So no need to round.
block->color(x, y).setRGBA(uint8(rgba.x), uint8(rgba.y), uint8(rgba.z), uint8(rgba.w));
}
}
}

@ -36,6 +36,7 @@ namespace nv
struct AlphaBlock4x4;
class Stream;
class Vector3;
class Vector4;
/// DXT1 block.
@ -220,7 +221,7 @@ namespace nv
struct BlockBC6
{
uint8 data[16]; // Not even going to try to write a union for this thing.
void decodeBlock(Vector3 colors[16]) const;
void decodeBlock(Vector4 colors[16]) const;
};
/// BC7 block.

@ -14,7 +14,8 @@ SET(IMAGE_SRCS
NormalMap.h NormalMap.cpp
PixelFormat.h
PsdFile.h
TgaFile.h)
TgaFile.h
KtxFile.h KtxFile.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

@ -454,7 +454,8 @@ namespace
{ D3DFMT_L8, DXGI_FORMAT_R8_UNORM , { 8, 0xFF, 0, 0, 0 } },
{ D3DFMT_L16, DXGI_FORMAT_R16_UNORM, { 16, 0xFFFF, 0, 0, 0 } },
{ D3DFMT_A8L8, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0, 0, 0xFF00 } },
{ D3DFMT_A8L8, 0, { 16, 0xFF, 0, 0, 0xFF00 } },
{ 0, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0xFF00, 0, 0 } },
};
static const uint s_formatCount = NV_ARRAY_SIZE(s_formats);
@ -635,7 +636,7 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{
// set fourcc pixel format.
this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3);
this->pf.fourcc = NV_MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0;
this->pf.rmask = 0;
@ -659,7 +660,7 @@ void DDSHeader::setFormatCode(uint32 code)
void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{
this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = NV_MAKEFOURCC(c0, c1, c2, c3);
}
@ -1445,7 +1446,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
{
BlockBC6 block;
*stream << block;
Vector3 colors[16];
Vector4 colors[16];
block.decodeBlock(colors);
// Clamp to [0, 1] and round to 8-bit
@ -1453,7 +1454,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
{
for (int x = 0; x < 4; ++x)
{
Vector3 px = colors[y*4 + x];
Vector4 px = colors[y*4 + x];
rgba->color(x, y).setRGBA(
ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f),
ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f),
@ -1535,7 +1536,7 @@ uint DirectDrawSurface::surfaceSize(uint mipmap) const
else {
w = (w + 3) / 4;
h = (h + 3) / 4;
d = d; // @@ How are 3D textures aligned?
//d = d; // @@ How are 3D textures aligned?
return blockSize * w * h * d;
}
}

@ -27,11 +27,9 @@
#include "nvimage.h"
#if !defined(MAKEFOURCC)
#define MAKEFOURCC(ch0, ch1, ch2, ch3) \
#define NV_MAKEFOURCC(ch0, ch1, ch2, ch3) \
(uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \
(uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 ))
#endif
namespace nv
{
@ -101,19 +99,26 @@ namespace nv
enum FOURCC
{
FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'),
FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '),
FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'),
FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'),
FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'),
FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'),
FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'),
FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'),
FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'),
FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'),
FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'),
FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'),
FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'),
FOURCC_NVTT = NV_MAKEFOURCC('N', 'V', 'T', 'T'),
FOURCC_DDS = NV_MAKEFOURCC('D', 'D', 'S', ' '),
FOURCC_DXT1 = NV_MAKEFOURCC('D', 'X', 'T', '1'),
FOURCC_DXT2 = NV_MAKEFOURCC('D', 'X', 'T', '2'),
FOURCC_DXT3 = NV_MAKEFOURCC('D', 'X', 'T', '3'),
FOURCC_DXT4 = NV_MAKEFOURCC('D', 'X', 'T', '4'),
FOURCC_DXT5 = NV_MAKEFOURCC('D', 'X', 'T', '5'),
FOURCC_RXGB = NV_MAKEFOURCC('R', 'X', 'G', 'B'),
FOURCC_ATI1 = NV_MAKEFOURCC('A', 'T', 'I', '1'),
FOURCC_ATI2 = NV_MAKEFOURCC('A', 'T', 'I', '2'),
FOURCC_A2XY = NV_MAKEFOURCC('A', '2', 'X', 'Y'),
FOURCC_DX10 = NV_MAKEFOURCC('D', 'X', '1', '0'),
FOURCC_UVER = NV_MAKEFOURCC('U', 'V', 'E', 'R'),
FOURCC_BC6H = NV_MAKEFOURCC('B', 'C', '6', 'H'),
FOURCC_BC7L = NV_MAKEFOURCC('B', 'C', '7', 'L'),
FOURCC_PVR0 = NV_MAKEFOURCC('P', 'V', 'R', '0'),
FOURCC_PVR1 = NV_MAKEFOURCC('P', 'V', 'R', '1'),
FOURCC_PVR2 = NV_MAKEFOURCC('P', 'V', 'R', '2'),
FOURCC_PVR3 = NV_MAKEFOURCC('P', 'V', 'R', '3'),
};

@ -1,460 +1,513 @@
#include "ErrorMetric.h"
#include "FloatImage.h"
#include "Filter.h"
#include "nvmath/Matrix.h"
#include "nvmath/Vector.inl"
#include <float.h> // FLT_MAX
using namespace nv;
float nv::rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight)
{
if (!sameLayout(img, ref)) {
return FLT_MAX;
}
nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4);
double mse = 0;
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++)
{
float r0 = ref->pixel(i + count * 0);
float g0 = ref->pixel(i + count * 1);
float b0 = ref->pixel(i + count * 2);
float a0 = ref->pixel(i + count * 3);
float r1 = img->pixel(i + count * 0);
float g1 = img->pixel(i + count * 1);
float b1 = img->pixel(i + count * 2);
//float a1 = img->pixel(i + count * 3);
float r = r0 - r1;
float g = g0 - g1;
float b = b0 - b1;
float a = 1;
if (alphaWeight) a = a0 * a0; // @@ a0*a1 or a0*a0 ?
mse += (r * r) * a;
mse += (g * g) * a;
mse += (b * b) * a;
}
return float(sqrt(mse / count));
}
float nv::rmsAlphaError(const FloatImage * ref, const FloatImage * img)
{
if (!sameLayout(img, ref)) {
return FLT_MAX;
}
nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
double mse = 0;
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++)
{
float a0 = img->pixel(i + count * 3);
float a1 = ref->pixel(i + count * 3);
float a = a0 - a1;
mse += a * a;
}
return float(sqrt(mse / count));
}
float nv::averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight)
{
if (!sameLayout(img, ref)) {
return FLT_MAX;
}
nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4);
double mae = 0;
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++)
{
float r0 = img->pixel(i + count * 0);
float g0 = img->pixel(i + count * 1);
float b0 = img->pixel(i + count * 2);
//float a0 = img->pixel(i + count * 3);
float r1 = ref->pixel(i + count * 0);
float g1 = ref->pixel(i + count * 1);
float b1 = ref->pixel(i + count * 2);
float a1 = ref->pixel(i + count * 3);
float r = fabs(r0 - r1);
float g = fabs(g0 - g1);
float b = fabs(b0 - b1);
float a = 1;
if (alphaWeight) a = a1;
mae += r * a;
mae += g * a;
mae += b * a;
}
return float(mae / count);
}
float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img)
{
if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
return FLT_MAX;
}
nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
double mae = 0;
const uint count = img->width() * img->height();
for (uint i = 0; i < count; i++)
{
float a0 = img->pixel(i + count * 3);
float a1 = ref->pixel(i + count * 3);
float a = a0 - a1;
mae += fabs(a);
}
return float(mae / count);
}
// Color space conversions based on:
// http://www.brucelindbloom.com/
// Assumes input is in *linear* sRGB color space.
static Vector3 rgbToXyz(Vector3::Arg c)
{
Vector3 xyz;
xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z;
xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z;
xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z;
return xyz;
}
static Vector3 xyzToRgb(Vector3::Arg c)
{
Vector3 rgb;
rgb.x = 3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z;
rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z;
rgb.z = 0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z;
return rgb;
}
static float toLinear(float f)
{
return powf(f, 2.2f);
}
static float toGamma(float f)
{
// @@ Use sRGB space?
return powf(f, 1.0f/2.2f);
}
static Vector3 toLinear(Vector3::Arg c)
{
return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z));
}
static Vector3 toGamma(Vector3::Arg c)
{
return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z));
}
static float f(float t)
{
const float epsilon = powf(6.0f/29.0f, 3);
if (t > epsilon) {
return powf(t, 1.0f/3.0f);
}
else {
return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f;
}
}
static float finv(float t)
{
if (t > 6.0f / 29.0f) {
return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f);
}
else {
return powf(t, 3.0f);
}
}
static Vector3 xyzToCieLab(Vector3::Arg c)
{
// Normalized white point.
const float Xn = 0.950456f;
const float Yn = 1.0f;
const float Zn = 1.088754f;
float Xr = c.x / Xn;
float Yr = c.y / Yn;
float Zr = c.z / Zn;
float fx = f(Xr);
float fy = f(Yr);
float fz = f(Zr);
float L = 116 * fx - 16;
float a = 500 * (fx - fy);
float b = 200 * (fy - fz);
return Vector3(L, a, b);
}
static Vector3 rgbToCieLab(Vector3::Arg c)
{
return xyzToCieLab(rgbToXyz(toLinear(c)));
}
// h is hue-angle in radians
static Vector3 cieLabToLCh(Vector3::Arg c)
{
return Vector3(c.x, sqrtf(c.y*c.y + c.z*c.z), atan2f(c.y, c.z));
}
static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage)
{
nvDebugCheck(rgbImage != NULL && LabImage != NULL);
nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height());
nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3);
const uint w = rgbImage->width();
const uint h = LabImage->height();
const float * R = rgbImage->channel(0);
const float * G = rgbImage->channel(1);
const float * B = rgbImage->channel(2);
float * L = LabImage->channel(0);
float * a = LabImage->channel(1);
float * b = LabImage->channel(2);
const uint count = w*h;
for (uint i = 0; i < count; i++)
{
Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i]));
L[i] = Lab.x;
a[i] = Lab.y;
b[i] = Lab.z;
}
}
// Assumes input images are in linear sRGB space.
float nv::cieLabError(const FloatImage * img0, const FloatImage * img1)
{
if (!sameLayout(img0, img1)) return FLT_MAX;
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
const float * r0 = img0->channel(0);
const float * g0 = img0->channel(1);
const float * b0 = img0->channel(2);
const float * r1 = img1->channel(0);
const float * g1 = img1->channel(1);
const float * b1 = img1->channel(2);
double error = 0.0f;
const uint count = img0->pixelCount();
for (uint i = 0; i < count; i++)
{
Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i]));
// @@ Measure Delta E.
Vector3 delta = lab0 - lab1;
error += length(delta);
}
return float(error / count);
}
// Assumes input images are in linear sRGB space.
float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1)
{
if (!sameLayout(img0, img1)) return FLT_MAX;
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
const float kL = 1;
const float kC = 1;
const float kH = 1;
const float k1 = 0.045f;
const float k2 = 0.015f;
const float sL = 1;
const float * r0 = img0->channel(0);
const float * g0 = img0->channel(1);
const float * b0 = img0->channel(2);
const float * r1 = img1->channel(0);
const float * g1 = img1->channel(1);
const float * b1 = img1->channel(2);
double error = 0.0f;
const uint count = img0->pixelCount();
for (uint i = 0; i < count; ++i)
{
Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
Vector3 lch0 = cieLabToLCh(lab0);
Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i]));
Vector3 lch1 = cieLabToLCh(lab1);
const float sC = 1 + k1*lch0.x;
const float sH = 1 + k2*lch0.x;
// @@ Measure Delta E using the 1994 definition
Vector3 labDelta = lab0 - lab1;
Vector3 lchDelta = lch0 - lch1;
double deltaLsq = powf(lchDelta.x / (kL*sL), 2);
double deltaCsq = powf(lchDelta.y / (kC*sC), 2);
// avoid possible sqrt of negative value by computing (deltaH/(kH*sH))^2
double deltaHsq = powf(labDelta.y, 2) + powf(labDelta.z, 2) - powf(lchDelta.y, 2);
deltaHsq /= powf(kH*sH, 2);
error += sqrt(deltaLsq + deltaCsq + deltaHsq);
}
return float(error / count);
}
float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1)
{
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
return FLT_MAX;
}
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
uint w = img0->width();
uint h = img0->height();
uint d = img0->depth();
FloatImage lab0, lab1; // Original images in CIE-Lab space.
lab0.allocate(3, w, h, d);
lab1.allocate(3, w, h, d);
// Convert input images to CIE-Lab.
rgbToCieLab(img0, &lab0);
rgbToCieLab(img1, &lab1);
// @@ Convolve each channel by the corresponding filter.
/*
GaussianFilter LFilter(5);
GaussianFilter aFilter(5);
GaussianFilter bFilter(5);
lab0.convolve(0, LFilter);
lab0.convolve(1, aFilter);
lab0.convolve(2, bFilter);
lab1.convolve(0, LFilter);
lab1.convolve(1, aFilter);
lab1.convolve(2, bFilter);
*/
// @@ Measure Delta E between lab0 and lab1.
return 0.0f;
}
// Assumes input images are normal maps.
float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1)
{
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
return FLT_MAX;
}
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
uint w = img0->width();
uint h = img0->height();
const float * x0 = img0->channel(0);
const float * y0 = img0->channel(1);
const float * z0 = img0->channel(2);
const float * x1 = img1->channel(0);
const float * y1 = img1->channel(1);
const float * z1 = img1->channel(2);
double error = 0.0f;
const uint count = w*h;
for (uint i = 0; i < count; i++)
{
Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
n0 = 2.0f * n0 - Vector3(1);
n1 = 2.0f * n1 - Vector3(1);
n0 = normalizeSafe(n0, Vector3(0), 0.0f);
n1 = normalizeSafe(n1, Vector3(0), 0.0f);
error += acos(clamp(dot(n0, n1), -1.0f, 1.0f));
}
return float(error / count);
}
float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1)
{
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
return FLT_MAX;
}
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
uint w = img0->width();
uint h = img0->height();
const float * x0 = img0->channel(0);
const float * y0 = img0->channel(1);
const float * z0 = img0->channel(2);
const float * x1 = img1->channel(0);
const float * y1 = img1->channel(1);
const float * z1 = img1->channel(2);
double error = 0.0f;
const uint count = w*h;
for (uint i = 0; i < count; i++)
{
Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
n0 = 2.0f * n0 - Vector3(1);
n1 = 2.0f * n1 - Vector3(1);
n0 = normalizeSafe(n0, Vector3(0), 0.0f);
n1 = normalizeSafe(n1, Vector3(0), 0.0f);
float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f));
error += angle * angle;
}
return float(sqrt(error / count));
}
#include "ErrorMetric.h"
#include "FloatImage.h"
#include "Filter.h"
#include "nvmath/Matrix.h"
#include "nvmath/Vector.inl"
#include <float.h> // FLT_MAX
using namespace nv;
float nv::rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight)
{
if (!sameLayout(img, ref)) {
return FLT_MAX;
}
nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4);
double mse = 0;
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++)
{
float r0 = ref->pixel(i + count * 0);
float g0 = ref->pixel(i + count * 1);
float b0 = ref->pixel(i + count * 2);
float a0 = ref->pixel(i + count * 3);
float r1 = img->pixel(i + count * 0);
float g1 = img->pixel(i + count * 1);
float b1 = img->pixel(i + count * 2);
//float a1 = img->pixel(i + count * 3);
float r = r0 - r1;
float g = g0 - g1;
float b = b0 - b1;
float a = 1;
if (alphaWeight) a = a0 * a0; // @@ a0*a1 or a0*a0 ?
mse += (r * r) * a;
mse += (g * g) * a;
mse += (b * b) * a;
}
return float(sqrt(mse / count));
}
float nv::rmsAlphaError(const FloatImage * ref, const FloatImage * img)
{
if (!sameLayout(img, ref)) {
return FLT_MAX;
}
nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
double mse = 0;
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++)
{
float a0 = img->pixel(i + count * 3);
float a1 = ref->pixel(i + count * 3);
float a = a0 - a1;
mse += a * a;
}
return float(sqrt(mse / count));
}
float nv::averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight)
{
if (!sameLayout(img, ref)) {
return FLT_MAX;
}
nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4);
double mae = 0;
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++)
{
float r0 = img->pixel(i + count * 0);
float g0 = img->pixel(i + count * 1);
float b0 = img->pixel(i + count * 2);
//float a0 = img->pixel(i + count * 3);
float r1 = ref->pixel(i + count * 0);
float g1 = ref->pixel(i + count * 1);
float b1 = ref->pixel(i + count * 2);
float a1 = ref->pixel(i + count * 3);
float r = fabs(r0 - r1);
float g = fabs(g0 - g1);
float b = fabs(b0 - b1);
float a = 1;
if (alphaWeight) a = a1;
mae += r * a;
mae += g * a;
mae += b * a;
}
return float(mae / count);
}
float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img)
{
if (img == NULL || ref == NULL || img->width() != ref->width() || img->height() != ref->height()) {
return FLT_MAX;
}
nvDebugCheck(img->componentCount() == 4 && ref->componentCount() == 4);
double mae = 0;
const uint count = img->width() * img->height();
for (uint i = 0; i < count; i++)
{
float a0 = img->pixel(i + count * 3);
float a1 = ref->pixel(i + count * 3);
float a = a0 - a1;
mae += fabs(a);
}
return float(mae / count);
}
float nv::rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight)
{
nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4);
double mse = 0;
const uint w0 = ref->width();
const uint h0 = ref->height();
const uint d0 = ref->depth();
const uint w1 = img->width();
const uint h1 = img->height();
const uint d1 = img->depth();
for (uint z = 0; z < d0; z++) {
for (uint y = 0; y < h0; y++) {
for (uint x = 0; x < w0; x++) {
float r0 = ref->pixel(0, x, y, z);
float g0 = ref->pixel(1, x, y, z);
float b0 = ref->pixel(2, x, y, z);
float a0 = ref->pixel(3, x, y, z);
float fx = float(x) / w0;
float fy = float(y) / h0;
float fz = float(z) / d0;
float r1 = img->sampleLinear(0, fx, fy, fz, wm);
float g1 = img->sampleLinear(1, fx, fy, fz, wm);
float b1 = img->sampleLinear(2, fx, fy, fz, wm);
float a1 = img->sampleLinear(2, fx, fy, fz, wm);
float dr = r0 - r1;
float dg = g0 - g1;
float db = b0 - b1;
float da = a0 - a1;
float w = 1;
if (alphaWeight) w = a0 * a0; // @@ a0*a1 or a0*a0 ?
mse += (dr * dr) * w;
mse += (dg * dg) * w;
mse += (db * db) * w;
mse += (da * da);
}
}
}
int count = w0 * h0 * d0;
return float(sqrt(mse / count));
}
// Color space conversions based on:
// http://www.brucelindbloom.com/
// Assumes input is in *linear* sRGB color space.
static Vector3 rgbToXyz(Vector3::Arg c)
{
Vector3 xyz;
xyz.x = 0.412453f * c.x + 0.357580f * c.y + 0.180423f * c.z;
xyz.y = 0.212671f * c.x + 0.715160f * c.y + 0.072169f * c.z;
xyz.z = 0.019334f * c.x + 0.119193f * c.y + 0.950227f * c.z;
return xyz;
}
static Vector3 xyzToRgb(Vector3::Arg c)
{
Vector3 rgb;
rgb.x = 3.2404542f * c.x - 1.5371385f * c.y - 0.4985314f * c.z;
rgb.y = -0.9692660f * c.x + 1.8760108f * c.y + 0.0415560f * c.z;
rgb.z = 0.0556434f * c.x - 0.2040259f * c.y + 1.0572252f * c.z;
return rgb;
}
static float toLinear(float f)
{
return powf(f, 2.2f);
}
static float toGamma(float f)
{
// @@ Use sRGB space?
return powf(f, 1.0f/2.2f);
}
static Vector3 toLinear(Vector3::Arg c)
{
return Vector3(toLinear(c.x), toLinear(c.y), toLinear(c.z));
}
static Vector3 toGamma(Vector3::Arg c)
{
return Vector3(toGamma(c.x), toGamma(c.y), toGamma(c.z));
}
static float f(float t)
{
const float epsilon = powf(6.0f/29.0f, 3);
if (t > epsilon) {
return powf(t, 1.0f/3.0f);
}
else {
return 1.0f/3.0f * powf(29.0f/6.0f, 2) * t + 4.0f / 29.0f;
}
}
static float finv(float t)
{
if (t > 6.0f / 29.0f) {
return 3.0f * powf(6.0f / 29.0f, 2) * (t - 4.0f / 29.0f);
}
else {
return powf(t, 3.0f);
}
}
static Vector3 xyzToCieLab(Vector3::Arg c)
{
// Normalized white point.
const float Xn = 0.950456f;
const float Yn = 1.0f;
const float Zn = 1.088754f;
float Xr = c.x / Xn;
float Yr = c.y / Yn;
float Zr = c.z / Zn;
float fx = f(Xr);
float fy = f(Yr);
float fz = f(Zr);
float L = 116 * fx - 16;
float a = 500 * (fx - fy);
float b = 200 * (fy - fz);
return Vector3(L, a, b);
}
static Vector3 rgbToCieLab(Vector3::Arg c)
{
return xyzToCieLab(rgbToXyz(toLinear(c)));
}
// h is hue-angle in radians
static Vector3 cieLabToLCh(Vector3::Arg c)
{
return Vector3(c.x, sqrtf(c.y*c.y + c.z*c.z), atan2f(c.y, c.z));
}
static void rgbToCieLab(const FloatImage * rgbImage, FloatImage * LabImage)
{
nvDebugCheck(rgbImage != NULL && LabImage != NULL);
nvDebugCheck(rgbImage->width() == LabImage->width() && rgbImage->height() == LabImage->height());
nvDebugCheck(rgbImage->componentCount() >= 3 && LabImage->componentCount() >= 3);
const uint w = rgbImage->width();
const uint h = LabImage->height();
const float * R = rgbImage->channel(0);
const float * G = rgbImage->channel(1);
const float * B = rgbImage->channel(2);
float * L = LabImage->channel(0);
float * a = LabImage->channel(1);
float * b = LabImage->channel(2);
const uint count = w*h;
for (uint i = 0; i < count; i++)
{
Vector3 Lab = rgbToCieLab(Vector3(R[i], G[i], B[i]));
L[i] = Lab.x;
a[i] = Lab.y;
b[i] = Lab.z;
}
}
// Assumes input images are in linear sRGB space.
float nv::cieLabError(const FloatImage * img0, const FloatImage * img1)
{
if (!sameLayout(img0, img1)) return FLT_MAX;
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
const float * r0 = img0->channel(0);
const float * g0 = img0->channel(1);
const float * b0 = img0->channel(2);
const float * r1 = img1->channel(0);
const float * g1 = img1->channel(1);
const float * b1 = img1->channel(2);
double error = 0.0f;
const uint count = img0->pixelCount();
for (uint i = 0; i < count; i++)
{
Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i]));
// @@ Measure Delta E.
Vector3 delta = lab0 - lab1;
error += length(delta);
}
return float(error / count);
}
// Assumes input images are in linear sRGB space.
float nv::cieLab94Error(const FloatImage * img0, const FloatImage * img1)
{
if (!sameLayout(img0, img1)) return FLT_MAX;
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
const float kL = 1;
const float kC = 1;
const float kH = 1;
const float k1 = 0.045f;
const float k2 = 0.015f;
const float sL = 1;
const float * r0 = img0->channel(0);
const float * g0 = img0->channel(1);
const float * b0 = img0->channel(2);
const float * r1 = img1->channel(0);
const float * g1 = img1->channel(1);
const float * b1 = img1->channel(2);
double error = 0.0f;
const uint count = img0->pixelCount();
for (uint i = 0; i < count; ++i)
{
Vector3 lab0 = rgbToCieLab(Vector3(r0[i], g0[i], b0[i]));
Vector3 lch0 = cieLabToLCh(lab0);
Vector3 lab1 = rgbToCieLab(Vector3(r1[i], g1[i], b1[i]));
Vector3 lch1 = cieLabToLCh(lab1);
const float sC = 1 + k1*lch0.x;
const float sH = 1 + k2*lch0.x;
// @@ Measure Delta E using the 1994 definition
Vector3 labDelta = lab0 - lab1;
Vector3 lchDelta = lch0 - lch1;
double deltaLsq = powf(lchDelta.x / (kL*sL), 2);
double deltaCsq = powf(lchDelta.y / (kC*sC), 2);
// avoid possible sqrt of negative value by computing (deltaH/(kH*sH))^2
double deltaHsq = powf(labDelta.y, 2) + powf(labDelta.z, 2) - powf(lchDelta.y, 2);
deltaHsq /= powf(kH*sH, 2);
error += sqrt(deltaLsq + deltaCsq + deltaHsq);
}
return float(error / count);
}
float nv::spatialCieLabError(const FloatImage * img0, const FloatImage * img1)
{
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
return FLT_MAX;
}
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
uint w = img0->width();
uint h = img0->height();
uint d = img0->depth();
FloatImage lab0, lab1; // Original images in CIE-Lab space.
lab0.allocate(3, w, h, d);
lab1.allocate(3, w, h, d);
// Convert input images to CIE-Lab.
rgbToCieLab(img0, &lab0);
rgbToCieLab(img1, &lab1);
// @@ Convolve each channel by the corresponding filter.
/*
GaussianFilter LFilter(5);
GaussianFilter aFilter(5);
GaussianFilter bFilter(5);
lab0.convolve(0, LFilter);
lab0.convolve(1, aFilter);
lab0.convolve(2, bFilter);
lab1.convolve(0, LFilter);
lab1.convolve(1, aFilter);
lab1.convolve(2, bFilter);
*/
// @@ Measure Delta E between lab0 and lab1.
return 0.0f;
}
// Assumes input images are normal maps.
float nv::averageAngularError(const FloatImage * img0, const FloatImage * img1)
{
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
return FLT_MAX;
}
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
uint w = img0->width();
uint h = img0->height();
const float * x0 = img0->channel(0);
const float * y0 = img0->channel(1);
const float * z0 = img0->channel(2);
const float * x1 = img1->channel(0);
const float * y1 = img1->channel(1);
const float * z1 = img1->channel(2);
double error = 0.0f;
const uint count = w*h;
for (uint i = 0; i < count; i++)
{
Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
n0 = 2.0f * n0 - Vector3(1);
n1 = 2.0f * n1 - Vector3(1);
n0 = normalizeSafe(n0, Vector3(0), 0.0f);
n1 = normalizeSafe(n1, Vector3(0), 0.0f);
error += acos(clamp(dot(n0, n1), -1.0f, 1.0f));
}
return float(error / count);
}
float nv::rmsAngularError(const FloatImage * img0, const FloatImage * img1)
{
if (img0 == NULL || img1 == NULL || img0->width() != img1->width() || img0->height() != img1->height()) {
return FLT_MAX;
}
nvDebugCheck(img0->componentCount() == 4 && img1->componentCount() == 4);
uint w = img0->width();
uint h = img0->height();
const float * x0 = img0->channel(0);
const float * y0 = img0->channel(1);
const float * z0 = img0->channel(2);
const float * x1 = img1->channel(0);
const float * y1 = img1->channel(1);
const float * z1 = img1->channel(2);
double error = 0.0f;
const uint count = w*h;
for (uint i = 0; i < count; i++)
{
Vector3 n0 = Vector3(x0[i], y0[i], z0[i]);
Vector3 n1 = Vector3(x1[i], y1[i], z1[i]);
n0 = 2.0f * n0 - Vector3(1);
n1 = 2.0f * n1 - Vector3(1);
n0 = normalizeSafe(n0, Vector3(0), 0.0f);
n1 = normalizeSafe(n1, Vector3(0), 0.0f);
float angle = acosf(clamp(dot(n0, n1), -1.0f, 1.0f));
error += angle * angle;
}
return float(sqrt(error / count));
}

@ -1,5 +1,6 @@
#include "nvimage.h"
#include "FloatImage.h" // For FloatImage::WrapMode
namespace nv
@ -9,13 +10,15 @@ namespace nv
float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float rmsAlphaError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight);
float cieLabError(const FloatImage * ref, const FloatImage * img);
float cieLab94Error(const FloatImage * ref, const FloatImage * img);
float spatialCieLabError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float averageAngularError(const FloatImage * img0, const FloatImage * img1);
float rmsAngularError(const FloatImage * img0, const FloatImage * img1);

File diff suppressed because it is too large Load Diff

@ -35,6 +35,7 @@ namespace nv
};
NVIMAGE_API FloatImage();
NVIMAGE_API FloatImage(const FloatImage & img);
NVIMAGE_API FloatImage(const Image * img);
NVIMAGE_API virtual ~FloatImage();
@ -92,10 +93,10 @@ namespace nv
NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const;

@ -42,13 +42,21 @@ const Image & Image::operator=(const Image & img)
void Image::allocate(uint w, uint h, uint d/*= 1*/)
{
free();
m_width = w;
m_height = h;
m_depth = d;
m_data = realloc<Color32>(m_data, w * h * d);
}
void Image::acquire(Color32 * data, uint w, uint h, uint d/*= 1*/)
{
free();
m_width = w;
m_height = h;
m_depth = d;
m_data = data;
}
void Image::resize(uint w, uint h, uint d/*= 1*/) {
Image img;

@ -34,6 +34,7 @@ namespace nv
void allocate(uint w, uint h, uint d = 1);
void acquire(Color32 * data, uint w, uint h, uint d = 1);
bool load(const char * name);
void resize(uint w, uint h, uint d = 1);

@ -8,6 +8,8 @@
#include "DirectDrawSurface.h"
#include "PixelFormat.h"
#include "nvthread/ParallelFor.h"
#include "nvmath/Color.h"
#include "nvmath/Half.h"
@ -19,31 +21,31 @@
#include "nvcore/TextWriter.h"
// Extern
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
# include <FreeImage.h>
// If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms.
# undef HAVE_JPEG
# undef HAVE_PNG
# undef HAVE_TIFF
# undef HAVE_OPENEXR
# undef NV_HAVE_JPEG
# undef NV_HAVE_PNG
# undef NV_HAVE_TIFF
# undef NV_HAVE_OPENEXR
#endif
#if defined(HAVE_JPEG)
#if defined(NV_HAVE_JPEG)
extern "C" {
# include <jpeglib.h>
}
#endif
#if defined(HAVE_PNG)
#if defined(NV_HAVE_PNG)
# include <png.h>
#endif
#if defined(HAVE_TIFF)
#if defined(NV_HAVE_TIFF)
# define _TIFF_DATA_TYPEDEFS_
# include <tiffio.h>
#endif
#if defined(HAVE_OPENEXR)
#if defined(NV_HAVE_OPENEXR)
# include <ImfIO.h>
# include <ImathBox.h>
# include <ImfChannelList.h>
@ -52,7 +54,7 @@ extern "C" {
# include <ImfArray.h>
#endif
#if defined(HAVE_STBIMAGE)
#if defined(NV_HAVE_STBIMAGE)
# define STBI_NO_STDIO
# include <stb_image.h>
#endif
@ -303,6 +305,51 @@ static bool saveTGA(Stream & s, const Image * img)
return true;
}
#pragma optimize("", off)
// Save BMP image.
static bool saveBMP(Stream & s, const Image * img)
{
int w = img->width();
int h = img->height();
int image_size = w * h * 3;
BmpFileHeader header;
zero(header);
header.type = BM_TYPE;
header.size = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE + image_size;
header.offBits = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE;
BmpInfoHeader info;
zero(info);
info.size = BITMAPINFOHEADER_SIZE;
info.width = w;
info.height = h;
info.planes = 1;
info.bitCount = 24;
info.sizeImage = image_size;
info.xPelsPerMeter = 2000;
info.yPelsPerMeter = 2000;
s << header;
s << info;
nv::Array<uint8> data;
data.resize(3 * w);
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
data[x * 3 + 0] = img->pixel(x, h - y - 1).b;
data[x * 3 + 1] = img->pixel(x, h - y - 1).g;
data[x * 3 + 2] = img->pixel(x, h - y - 1).r;
}
s.serialize(data.buffer(), data.size());
}
return true;
}
/*static Image * loadPPM(Stream & s)
{
// @@
@ -324,7 +371,10 @@ static bool savePPM(Stream & s, const Image * img)
writer.writeString("255\n");
for (uint i = 0; i < w * h; i++) {
Color32 c = img->pixel(i);
s << (uint8_t&)c.r << (uint8_t&)c.g << (uint8_t&)c.b;
uint8 r = c.r; // current version of apple's llvm compiling for arm64 doesn't like taking the address of a bit-field. Workaround by using the stack
uint8 g = c.g;
uint8 b = c.b;
s << r << g << b;
}
return true;
@ -653,7 +703,7 @@ static bool saveFloatDDS(Stream & s, const FloatImage * img, uint base_component
}
#if defined(HAVE_PNG)
#if defined(NV_HAVE_PNG)
static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
{
@ -902,9 +952,9 @@ static bool savePNG(Stream & s, const Image * img, const char ** tags/*=NULL*/)
return true;
}
#endif // defined(HAVE_PNG)
#endif // defined(NV_HAVE_PNG)
#if defined(HAVE_JPEG)
#if defined(NV_HAVE_JPEG)
static void init_source (j_decompress_ptr /*cinfo*/){
}
@ -1011,9 +1061,9 @@ static Image * loadJPG(Stream & s)
return img.release();
}
#endif // defined(HAVE_JPEG)
#endif // defined(NV_HAVE_JPEG)
#if defined(HAVE_TIFF)
#if defined(NV_HAVE_TIFF)
/*
static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size)
@ -1207,9 +1257,9 @@ static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint
return true;
}
#endif // defined(HAVE_TIFF)
#endif // defined(NV_HAVE_TIFF)
#if defined(HAVE_OPENEXR)
#if defined(NV_HAVE_OPENEXR)
namespace
{
@ -1348,10 +1398,10 @@ static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint
return true;
}
#endif // defined(HAVE_OPENEXR)
#endif // defined(NV_HAVE_OPENEXR)
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle)
{
@ -1688,10 +1738,10 @@ bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Fl
return result;
}
#endif // defined(HAVE_FREEIMAGE)
#endif // defined(NV_HAVE_FREEIMAGE)
#if defined(HAVE_STBIMAGE)
#if defined(NV_HAVE_STBIMAGE)
static Image * loadSTB(Stream & s)
{
@ -1704,28 +1754,22 @@ static Image * loadSTB(Stream & s)
int w, h, n;
uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4);
// @@ Hack: STB is returning n=4, because we request 4 components, even when input only has 3.
n = 3;
delete [] buffer;
if (data != NULL) {
Image * img = new Image;
img->allocate(w, h);
img->acquire((Color32 *)data, w, h);
img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB);
for (int y = 0; y < h; ++y)
{
nv::Color32* dest = img->scanline(y);
uint8* src = data + y * w * 4;
for (int x = 0; x < w; ++x)
{
dest[x].r = src[x * 4 + 0];
dest[x].g = src[x * 4 + 1];
dest[x].b = src[x * 4 + 2];
dest[x].a = src[x * 4 + 3];
}
}
free(data);
int count = w * h;
for (int i = 0; i < count; ++i) {
//parallel_for(count, 128, [&](int i) {
Color32 & pixel = img->pixel(i);
swap(pixel.r, pixel.b);
}//);
return img;
}
@ -1766,7 +1810,7 @@ static FloatImage * loadFloatSTB(Stream & s)
return NULL;
}
#endif // defined(HAVE_STBIMAGE)
#endif // defined(NV_HAVE_STBIMAGE)
@ -1804,32 +1848,33 @@ Image * nv::ImageIO::load(const char * fileName, Stream & s)
return loadPPM(s);
}*/
#if defined(HAVE_JPEG)
#if defined(NV_HAVE_JPEG)
if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) {
return loadJPG(s);
}
#endif
#if defined(HAVE_PNG)
#if defined(NV_HAVE_PNG)
if (strCaseDiff(extension, ".png") == 0) {
return loadPNG(s);
}
#endif
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) {
return loadFreeImage(fif, s);
}
#endif
#if defined(HAVE_STBIMAGE)
#if defined(NV_HAVE_STBIMAGE)
return loadSTB(s);
#endif
return NULL;
}
bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/)
{
nvDebugCheck(fileName != NULL);
@ -1838,6 +1883,10 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
const char * extension = Path::extension(fileName);
if (strCaseDiff(extension, ".bmp") == 0) {
return saveBMP(s, img);
}
if (strCaseDiff(extension, ".tga") == 0) {
return saveTGA(s, img);
}
@ -1846,13 +1895,13 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
return savePPM(s, img);
}
#if defined(HAVE_PNG)
#if defined(NV_HAVE_PNG)
if (strCaseDiff(extension, ".png") == 0) {
return savePNG(s, img, tags);
}
#endif
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
return saveFreeImage(fif, s, img, tags);
@ -1899,27 +1948,27 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
return loadFloatPFM(s);
}*/
#if defined(HAVE_TIFF)
#if defined(NV_HAVE_TIFF)
#pragma NV_MESSAGE("TODO: Load TIFF from stream.")
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
return loadFloatTIFF(fileName, s);
}
#endif
#if defined(HAVE_OPENEXR)
#if defined(NV_HAVE_OPENEXR)
#pragma NV_MESSAGE("TODO: Load EXR from stream.")
if (strCaseDiff(extension, ".exr") == 0) {
return loadFloatEXR(fileName, s);
}
#endif
#if defined(HAVE_STBIMAGE)
#if defined(NV_HAVE_STBIMAGE)
if (strCaseDiff(extension, ".hdr") == 0) {
return loadFloatSTB(s);
}
#endif
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) {
return loadFloatFreeImage(fif, s);
@ -1961,7 +2010,7 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
return saveFloatPFM(s, fimage, baseComponent, componentCount);
}*/
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount);
@ -2005,14 +2054,15 @@ bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, ui
}
const char * extension = Path::extension(fileName);
NV_UNUSED(extension);
#if defined(HAVE_OPENEXR)
#if defined(NV_HAVE_OPENEXR)
if (strCaseDiff(extension, ".exr") == 0) {
return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
}
#endif
#if defined(HAVE_TIFF)
#if defined(NV_HAVE_TIFF)
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
return saveFloatTIFF(fileName, fimage, baseComponent, componentCount);
}

@ -1,6 +1,7 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#include "KtxFile.h"
#include "nvcore/StdStream.h"
using namespace nv;
@ -10,6 +11,8 @@ static const uint8 fileIdentifier[12] = {
0x0D, 0x0A, 0x1A, 0x0A
};
namespace nv
{
KtxHeader::KtxHeader() {
memcpy(identifier, fileIdentifier, 12);
@ -19,8 +22,8 @@ KtxHeader::KtxHeader() {
glType = 0;
glTypeSize = 1;
glFormat = 0;
glInternalFormat = KTX_RGBA;
glBaseInternalFormat = KTX_RGBA;
glInternalFormat = KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1;
glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
pixelWidth = 0;
pixelHeight = 0;
pixelDepth = 0;
@ -31,9 +34,9 @@ KtxHeader::KtxHeader() {
}
Stream & operator<< (Stream & s, DDSHeader & header) {
Stream & operator<< (Stream & s, KtxHeader & header) {
s.serialize(header.identifier, 12);
s << header.endiannes << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat;
s << header.endianness << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat;
s << header.pixelWidth << header.pixelHeight << header.pixelDepth;
s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels;
s << header.bytesOfKeyValueData;
@ -41,7 +44,7 @@ Stream & operator<< (Stream & s, DDSHeader & header) {
}
KtxFile::KtxFile() {
/*KtxFile::KtxFile() {
}
KtxFile::~KtxFile() {
}
@ -49,7 +52,7 @@ KtxFile::~KtxFile() {
void KtxFile::addKeyValue(const char * key, const char * value) {
keyArray.append(key);
valueArray.append(value);
bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1;
header.bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1;
}
@ -77,7 +80,8 @@ Stream & operator<< (Stream & s, KtxFile & file) {
}
return s;
}
}*/
} // nv

@ -6,6 +6,7 @@
#include "nvimage.h"
#include "nvcore/StrLib.h"
#include "nvcore/Array.h"
// KTX File format specification:
// http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key
@ -14,22 +15,99 @@ namespace nv
{
class Stream;
// GL types (Table 3.2)
const uint KTX_UNSIGNED_BYTE;
const uint KTX_UNSIGNED_SHORT_5_6_5;
// ...
// GL formats (Table 3.3)
// ...
// GL internal formats (Table 3.12, 3.13)
// ...
// GL base internal format. (Table 3.11)
const uint KTX_RGB;
const uint KTX_RGBA;
const uint KTX_ALPHA;
// ...
// GL types
const uint KTX_UNSIGNED_BYTE = 0x1401;
const uint KTX_BYTE = 0x1400;
const uint KTX_UNSIGNED_SHORT = 0x1403;
const uint KTX_SHORT = 0x1402;
const uint KTX_UNSIGNED_INT = 0x1405;
const uint KTX_INT = 0x1404;
const uint KTX_FLOAT = 0x1406;
const uint KTX_UNSIGNED_BYTE_3_3_2 = 0x8032;
const uint KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362;
const uint KTX_UNSIGNED_SHORT_5_6_5 = 0x8363;
const uint KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364;
const uint KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033;
const uint KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365;
const uint KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034;
const uint KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366;
const uint KTX_UNSIGNED_INT_8_8_8_8 = 0x8035;
const uint KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367;
const uint KTX_UNSIGNED_INT_10_10_10_2 = 0x8036;
const uint KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368;
// GL formats
const uint KTX_FORMAT_RED = 0x1903;
const uint KTX_FORMAT_RG = 0x8227;
const uint KTX_FORMAT_RGB = 0x1907;
const uint KTX_FORMAT_BGR = 0x80E0;
const uint KTX_FORMAT_RGBA = 0x1908;
const uint KTX_FORMAT_BGRA = 0x80E1;
const uint KTX_FORMAT_RED_INTEGER = 0x8D94;
const uint KTX_FORMAT_RG_INTEGER = 0x8228;
const uint KTX_FORMAT_RGB_INTEGER = 0x8D98;
const uint KTX_FORMAT_BGR_INTEGER = 0x8D9A;
const uint KTX_FORMAT_RGBA_INTEGER = 0x8D99;
const uint KTX_FORMAT_BGRA_INTEGER = 0x8D9B;
const uint KTX_FORMAT_STENCIL_INDEX = 0x1901;
const uint KTX_FORMAT_DEPTH_COMPONENT = 0x1902;
const uint KTX_FORMAT_DEPTH_STENCIL = 0x84F9;
// GL internal formats
// BC1
const uint KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1 = 0x83F0;
const uint KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 = 0x8C4C;
// BC1a
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1 = 0x83F1;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 = 0x8C4D;
// BC2
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3 = 0x83F2;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 = 0x8C4E;
// BC3
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5 = 0x83F3;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 = 0x8C4F;
// BC4
const uint KTX_INTERNAL_COMPRESSED_RED_RGTC1 = 0x8DBB;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 = 0x8DBC;
// BC5
const uint KTX_INTERNAL_COMPRESSED_RG_RGTC2 = 0x8DBD;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 = 0x8DBE;
// BC6
const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F;
const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E;
// BC7
const uint KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D;
// ETC
const uint KTX_INTERNAL_COMPRESSED_RGB_ETC1 = 0x8D64;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC1 = 0x8D64; // ???
// ETC2
const uint KTX_INTERNAL_COMPRESSED_RED_EAC = 0x9270;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_EAC = 0x9271;
const uint KTX_INTERNAL_COMPRESSED_RG_EAC = 0x9272;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_EAC = 0x9273;
const uint KTX_INTERNAL_COMPRESSED_RGB_ETC2 = 0x9274;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC2 = 0x9275;
const uint KTX_INTERNAL_COMPRESSED_RGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9276;
const uint KTX_INTERNAL_COMPRESSED_SRGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9277;
const uint KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC = 0x9278;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC = 0x9279;
// GL base internal formats
const uint KTX_BASE_INTERNAL_DEPTH_COMPONENT = 0x1902;
const uint KTX_BASE_INTERNAL_DEPTH_STENCIL = 0x84F9;
const uint KTX_BASE_INTERNAL_RED = 0x1903;
const uint KTX_BASE_INTERNAL_RG = 0x8227;
const uint KTX_BASE_INTERNAL_RGB = 0x1907;
const uint KTX_BASE_INTERNAL_RGBA = 0x1908;
const uint KTX_BASE_INTERNAL_STENCIL_INDEX = 0x1901;
struct KtxHeader {
@ -52,10 +130,10 @@ namespace nv
};
NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header);
NVIMAGE_API Stream & operator<< (Stream & s, KtxHeader & header);
struct KtxFile {
/* struct KtxFile {
KtxFile();
~KtxFile();
@ -66,10 +144,9 @@ namespace nv
Array<String> keyArray;
Array<String> valueArray;
};
NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file);
NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file);*/
/*

@ -1,208 +1,208 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "NormalMap.h"
#include "Filter.h"
#include "FloatImage.h"
#include "Image.h"
#include "nvmath/Color.inl"
#include "nvmath/Vector.h"
#include "nvcore/Ptr.h"
#include <string.h> // memcpy
using namespace nv;
// Create normal map using the given kernels.
static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv)
{
nvDebugCheck(kdu != NULL);
nvDebugCheck(kdv != NULL);
nvDebugCheck(img != NULL);
const uint w = img->width();
const uint h = img->height();
AutoPtr<FloatImage> fimage(new FloatImage());
fimage->allocate(4, w, h);
// Compute height and store in alpha channel:
float * alphaChannel = fimage->channel(3);
for(uint i = 0; i < w * h; i++)
{
Vector4 color = toVector4(img->pixel(i));
alphaChannel[i] = dot(color, heightWeights);
}
float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor.
for(uint y = 0; y < h; y++)
{
for(uint x = 0; x < w; x++)
{
const float du = fimage->applyKernelXY(kdu, x, y, 0, 3, wm);
const float dv = fimage->applyKernelXY(kdv, x, y, 0, 3, wm);
Vector3 n = normalize(Vector3(du, dv, heightScale));
fimage->pixel(0, x, y, 0) = 0.5f * n.x + 0.5f;
fimage->pixel(1, x, y, 0) = 0.5f * n.y + 0.5f;
fimage->pixel(2, x, y, 0) = 0.5f * n.z + 0.5f;
}
}
return fimage.release();
}
// Create normal map using the given kernels.
static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv)
{
nvDebugCheck(kdu != NULL);
nvDebugCheck(kdv != NULL);
nvDebugCheck(img != NULL);
#pragma NV_MESSAGE("FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.")
const float heightScale = 1.0f / 16.0f;
const uint w = img->width();
const uint h = img->height();
AutoPtr<FloatImage> img_out(new FloatImage());
img_out->allocate(4, w, h);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
const float du = img->applyKernelXY(kdu, x, y, 0, 3, wm);
const float dv = img->applyKernelXY(kdv, x, y, 0, 3, wm);
Vector3 n = normalize(Vector3(du, dv, heightScale));
img_out->pixel(0, x, y, 0) = n.x;
img_out->pixel(1, x, y, 0) = n.y;
img_out->pixel(2, x, y, 0) = n.z;
}
}
// Copy alpha channel.
/*for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
img_out->pixel(3, x, y, 0) = img->pixel(3, x, y, 0);
}
}*/
memcpy(img_out->channel(3), img->channel(3), w * h * sizeof(float));
return img_out.release();
}
/// Create normal map using the given filter.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/)
{
nvDebugCheck(img != NULL);
// Init the kernels.
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
switch(filter)
{
case NormalMapFilter_Sobel3x3:
kdu = new Kernel2(3);
break;
case NormalMapFilter_Sobel5x5:
kdu = new Kernel2(5);
break;
case NormalMapFilter_Sobel7x7:
kdu = new Kernel2(7);
break;
case NormalMapFilter_Sobel9x9:
kdu = new Kernel2(9);
break;
default:
nvDebugCheck(false);
};
kdu->initSobel();
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
}
/// Create normal map combining multiple sobel filters.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights)
{
nvDebugCheck(img != NULL);
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
kdu = new Kernel2(9);
kdu->initBlendedSobel(filterWeights);
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
}
FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights)
{
nvDebugCheck(img != NULL);
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
kdu = new Kernel2(9);
kdu->initBlendedSobel(filterWeights);
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, kdu, kdv);
}
/// Normalize the given image in place.
void nv::normalizeNormalMap(FloatImage * img)
{
nvDebugCheck(img != NULL);
img->normalize(0);
}
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include "NormalMap.h"
#include "Filter.h"
#include "FloatImage.h"
#include "Image.h"
#include "nvmath/Color.inl"
#include "nvmath/Vector.h"
#include "nvcore/Ptr.h"
#include <string.h> // memcpy
using namespace nv;
// Create normal map using the given kernels.
static FloatImage * createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, const Kernel2 * kdu, const Kernel2 * kdv)
{
nvDebugCheck(kdu != NULL);
nvDebugCheck(kdv != NULL);
nvDebugCheck(img != NULL);
const uint w = img->width();
const uint h = img->height();
AutoPtr<FloatImage> fimage(new FloatImage());
fimage->allocate(4, w, h);
// Compute height and store in alpha channel:
float * alphaChannel = fimage->channel(3);
for(uint i = 0; i < w * h; i++)
{
Vector4 color = toVector4(img->pixel(i));
alphaChannel[i] = dot(color, heightWeights);
}
float heightScale = 1.0f / 16.0f; // @@ Use a user defined factor.
for(uint y = 0; y < h; y++)
{
for(uint x = 0; x < w; x++)
{
const float du = fimage->applyKernelXY(kdu, x, y, 0, 3, wm);
const float dv = fimage->applyKernelXY(kdv, x, y, 0, 3, wm);
Vector3 n = normalize(Vector3(du, dv, heightScale));
fimage->pixel(0, x, y, 0) = 0.5f * n.x + 0.5f;
fimage->pixel(1, x, y, 0) = 0.5f * n.y + 0.5f;
fimage->pixel(2, x, y, 0) = 0.5f * n.z + 0.5f;
}
}
return fimage.release();
}
// Create normal map using the given kernels.
static FloatImage * createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, const Kernel2 * kdu, const Kernel2 * kdv)
{
nvDebugCheck(kdu != NULL);
nvDebugCheck(kdv != NULL);
nvDebugCheck(img != NULL);
#pragma NV_MESSAGE("FIXME: Height scale parameter should go away. It should be a sensible value that produces good results when the heightmap is in the [0, 1] range.")
const float heightScale = 1.0f / 16.0f;
const uint w = img->width();
const uint h = img->height();
AutoPtr<FloatImage> img_out(new FloatImage());
img_out->allocate(4, w, h);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
const float du = img->applyKernelXY(kdu, x, y, 0, 3, wm);
const float dv = img->applyKernelXY(kdv, x, y, 0, 3, wm);
Vector3 n = normalize(Vector3(du, dv, heightScale));
img_out->pixel(0, x, y, 0) = n.x;
img_out->pixel(1, x, y, 0) = n.y;
img_out->pixel(2, x, y, 0) = n.z;
}
}
// Copy alpha channel.
/*for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
img_out->pixel(3, x, y, 0) = img->pixel(3, x, y, 0);
}
}*/
memcpy(img_out->channel(3), img->channel(3), w * h * sizeof(float));
return img_out.release();
}
/// Create normal map using the given filter.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, NormalMapFilter filter /*= Sobel3x3*/)
{
nvDebugCheck(img != NULL);
// Init the kernels.
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
switch(filter)
{
case NormalMapFilter_Sobel3x3:
kdu = new Kernel2(3);
break;
case NormalMapFilter_Sobel5x5:
kdu = new Kernel2(5);
break;
case NormalMapFilter_Sobel7x7:
kdu = new Kernel2(7);
break;
case NormalMapFilter_Sobel9x9:
kdu = new Kernel2(9);
break;
default:
nvDebugCheck(false);
};
kdu->initSobel();
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
}
/// Create normal map combining multiple sobel filters.
FloatImage * nv::createNormalMap(const Image * img, FloatImage::WrapMode wm, Vector4::Arg heightWeights, Vector4::Arg filterWeights)
{
nvDebugCheck(img != NULL);
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
kdu = new Kernel2(9);
kdu->initBlendedSobel(filterWeights);
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, heightWeights, kdu, kdv);
}
FloatImage * nv::createNormalMap(const FloatImage * img, FloatImage::WrapMode wm, Vector4::Arg filterWeights)
{
nvDebugCheck(img != NULL);
Kernel2 * kdu = NULL;
Kernel2 * kdv = NULL;
kdu = new Kernel2(9);
kdu->initBlendedSobel(filterWeights);
kdu->normalize();
kdv = new Kernel2(*kdu);
kdv->transpose();
return ::createNormalMap(img, wm, kdu, kdv);
}
/// Normalize the given image in place.
void nv::normalizeNormalMap(FloatImage * img)
{
nvDebugCheck(img != NULL);
img->normalize(0);
}

@ -101,6 +101,48 @@ inline Stream & operator<< (Stream & s, TgaFile & tga)
return s;
}
// @@ Move to BMP file?
const int BITMAPFILEHEADER_SIZE = 14;
const int BITMAPINFOHEADER_SIZE = 40;
const int BM_TYPE = ((unsigned int)'M') << 8 | ((unsigned int)'B');
// BMP Header.
struct BmpFileHeader {
uint16 type;
uint32 size;
uint16 reserved1;
uint16 reserved2;
uint32 offBits;
};
struct BmpInfoHeader {
uint32 size;
uint32 width;
uint32 height;
uint16 planes;
uint16 bitCount;
uint32 compression;
uint32 sizeImage;
uint32 xPelsPerMeter;
uint32 yPelsPerMeter;
uint32 clrUsed;
uint32 clrImportant;
};
inline Stream & operator<< (Stream & s, BmpFileHeader & bmp) {
return s << bmp.type << bmp.size << bmp.reserved1 << bmp.reserved2 << bmp.offBits;
}
inline Stream & operator<< (Stream & s, BmpInfoHeader & bmp) {
s << bmp.size << bmp.width << bmp.height << bmp.planes << bmp.bitCount << bmp.compression << bmp.sizeImage;
s << bmp.xPelsPerMeter << bmp.yPelsPerMeter << bmp.clrUsed << bmp.clrImportant;
return s;
}
} // nv namespace
#endif // NV_IMAGE_TGAFILE_H

@ -7,7 +7,7 @@ SET(MATH_SRCS
Fitting.h Fitting.cpp
Gamma.h Gamma.cpp
Half.h Half.cpp
Matrix.h
Matrix.h Matrix.inl Matrix.cpp
Plane.h Plane.inl Plane.cpp
SphericalHarmonic.h SphericalHarmonic.cpp
SimdVector.h SimdVector_SSE.h SimdVector_VE.h

@ -157,6 +157,12 @@ namespace nv
return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale);
}
inline Vector3 toVector3(Color32 c)
{
const float scale = 1.0f / 255.0f;
return Vector3(c.r * scale, c.g * scale, c.b * scale);
}
inline float perceptualColorDistance(Vector3::Arg c0, Vector3::Arg c1)
{

@ -1,441 +1,487 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Matrix.inl"
#include "Vector.inl"
#include "nvcore/Array.inl"
#include <float.h>
#if !NV_CC_MSVC && !NV_OS_ORBIS
#include <alloca.h>
#endif
using namespace nv;
// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise
// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above;
// indx[1..n] is an output vector that records the row permutation effected by the partial
// pivoting; d is output as -1 depending on whether the number of row interchanges was even
// or odd, respectively. This routine is used in combination with lubksb to solve linear equations
// or invert a matrix.
static bool ludcmp(float **a, int n, int *indx, float *d)
{
const float TINY = 1.0e-20f;
float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row.
*d = 1.0; // No row interchanges yet.
for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information.
float big = 0.0;
for (int j = 0; j < n; j++) {
big = max(big, fabsf(a[i][j]));
}
if (big == 0) {
return false; // Singular matrix
}
// No nonzero largest element.
vv[i] = 1.0f / big; // Save the scaling.
}
for (int j = 0; j < n; j++) { // This is the loop over columns of Crout's method.
for (int i = 0; i < j; i++) { // This is equation (2.3.12) except for i = j.
float sum = a[i][j];
for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j];
a[i][j] = sum;
}
int imax = -1;
float big = 0.0; // Initialize for the search for largest pivot element.
for (int i = j; i < n; i++) { // This is i = j of equation (2.3.12) and i = j+ 1 : : : N
float sum = a[i][j]; // of equation (2.3.13).
for (int k = 0; k < j; k++) {
sum -= a[i][k]*a[k][j];
}
a[i][j]=sum;
float dum = vv[i]*fabs(sum);
if (dum >= big) {
// Is the figure of merit for the pivot better than the best so far?
big = dum;
imax = i;
}
}
nvDebugCheck(imax != -1);
if (j != imax) { // Do we need to interchange rows?
for (int k = 0; k < n; k++) { // Yes, do so...
swap(a[imax][k], a[j][k]);
}
*d = -(*d); // ...and change the parity of d.
vv[imax]=vv[j]; // Also interchange the scale factor.
}
indx[j]=imax;
if (a[j][j] == 0.0) a[j][j] = TINY;
// If the pivot element is zero the matrix is singular (at least to the precision of the
// algorithm). For some applications on singular matrices, it is desirable to substitute
// TINY for zero.
if (j != n-1) { // Now, finally, divide by the pivot element.
float dum = 1.0f / a[j][j];
for (int i = j+1; i < n; i++) a[i][j] *= dum;
}
} // Go back for the next column in the reduction.
return true;
}
// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix
// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input
// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector
// B, and returns with the solution vector X. a, n, and indx are not modified by this routine
// and can be left in place for successive calls with different right-hand sides b. This routine takes
// into account the possibility that b will begin with many zero elements, so it is efficient for use
// in matrix inversion.
static void lubksb(float **a, int n, int *indx, float b[])
{
int ii = 0;
for (int i=0; i<n; i++) { // When ii is set to a positive value, it will become
int ip = indx[i]; // the index of the first nonvanishing element of b. We now
float sum = b[ip]; // do the forward substitution, equation (2.3.6). The
b[ip] = b[i]; // only new wrinkle is to unscramble the permutation as we go.
if (ii != 0) {
for (int j = ii-1; j < i; j++) sum -= a[i][j]*b[j];
}
else if (sum != 0.0f) {
ii = i+1; // A nonzero element was encountered, so from now on we
}
b[i] = sum; // will have to do the sums in the loop above.
}
for (int i=n-1; i>=0; i--) { // Now we do the backsubstitution, equation (2.3.7).
float sum = b[i];
for (int j = i+1; j < n; j++) {
sum -= a[i][j]*b[j];
}
b[i] = sum/a[i][i]; // Store a component of the solution vector X.
} // All done!
}
bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x)
{
nvDebugCheck(x != NULL);
float m[4][4];
float *a[4] = {m[0], m[1], m[2], m[3]};
int idx[4];
float d;
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
a[x][y] = A(x, y);
}
}
// Create LU decomposition.
if (!ludcmp(a, 4, idx, &d)) {
// Singular matrix.
return false;
}
// Init solution.
*x = b;
// Do back substitution.
lubksb(a, 4, idx, x->component);
return true;
}
// @@ Not tested.
Matrix nv::inverseLU(const Matrix & A)
{
Vector4 Ai[4];
solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]);
solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]);
solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]);
solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]);
return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]);
}
bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
{
nvDebugCheck(x != NULL);
float m[3][3];
float *a[3] = {m[0], m[1], m[2]};
int idx[3];
float d;
for (int y = 0; y < 3; y++) {
for (int x = 0; x < 3; x++) {
a[x][y] = A(x, y);
}
}
// Create LU decomposition.
if (!ludcmp(a, 3, idx, &d)) {
// Singular matrix.
return false;
}
// Init solution.
*x = b;
// Do back substitution.
lubksb(a, 3, idx, x->component);
return true;
}
bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
{
nvDebugCheck(x != NULL);
*x = transform(inverseCramer(A), b);
return true; // @@ Return false if determinant(A) == 0 !
}
bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
{
nvDebugCheck(x != NULL);
const float det = A.determinant();
if (equal(det, 0.0f)) { // @@ Use input epsilon.
return false;
}
Matrix3 Ai = inverseCramer(A);
*x = transform(Ai, b);
return true;
}
// Inverse using gaussian elimination. From Jon's code.
Matrix nv::inverse(const Matrix & m) {
Matrix A = m;
Matrix B(identity);
int i, j, k;
float max, t, det, pivot;
det = 1.0;
for (i=0; i<4; i++) { /* eliminate in column i, below diag */
max = -1.;
for (k=i; k<4; k++) /* find pivot for column i */
if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
j = k;
}
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
if (j!=i) { /* swap rows i and j */
for (k=i; k<4; k++)
swap(A(i, k), A(j, k));
for (k=0; k<4; k++)
swap(B(i, k), B(j, k));
det = -det;
}
pivot = A(i, i);
det *= pivot;
for (k=i+1; k<4; k++) /* only do elems to right of pivot */
A(i, k) /= pivot;
for (k=0; k<4; k++)
B(i, k) /= pivot;
/* we know that A(i, i) will be set to 1, so don't bother to do it */
for (j=i+1; j<4; j++) { /* eliminate in rows below i */
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
for (k=0; k<4; k++)
B(j, k) -= B(i, k)*t;
}
}
/*---------- backward elimination ----------*/
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */
for (k=0; k<4; k++) /* subtract scaled row i from row j */
B(j, k) -= B(i, k)*t;
}
}
return B;
}
Matrix3 nv::inverse(const Matrix3 & m) {
Matrix3 A = m;
Matrix3 B(identity);
int i, j, k;
float max, t, det, pivot;
det = 1.0;
for (i=0; i<3; i++) { /* eliminate in column i, below diag */
max = -1.;
for (k=i; k<3; k++) /* find pivot for column i */
if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
j = k;
}
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
if (j!=i) { /* swap rows i and j */
for (k=i; k<3; k++)
swap(A(i, k), A(j, k));
for (k=0; k<3; k++)
swap(B(i, k), B(j, k));
det = -det;
}
pivot = A(i, i);
det *= pivot;
for (k=i+1; k<3; k++) /* only do elems to right of pivot */
A(i, k) /= pivot;
for (k=0; k<3; k++)
B(i, k) /= pivot;
/* we know that A(i, i) will be set to 1, so don't bother to do it */
for (j=i+1; j<3; j++) { /* eliminate in rows below i */
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<3; k++) /* subtract scaled row i from row j */
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
for (k=0; k<3; k++)
B(j, k) -= B(i, k)*t;
}
}
/*---------- backward elimination ----------*/
for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */
for (k=0; k<3; k++) /* subtract scaled row i from row j */
B(j, k) -= B(i, k)*t;
}
}
return B;
}
#if 0
// Copyright (C) 1999-2004 Michael Garland.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, provided that the above
// copyright notice(s) and this permission notice appear in all copies of
// the Software and that both the above copyright notice(s) and this
// permission notice appear in supporting documentation.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
// OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
// INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
// FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
// NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
// WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Except as contained in this notice, the name of a copyright holder
// shall not be used in advertising or otherwise to promote the sale, use
// or other dealings in this Software without prior written authorization
// of the copyright holder.
// Matrix inversion code for 4x4 matrices using Gaussian elimination
// with partial pivoting. This is a specialized version of a
// procedure originally due to Paul Heckbert <ph@cs.cmu.edu>.
//
// Returns determinant of A, and B=inverse(A)
// If matrix A is singular, returns 0 and leaves trash in B.
//
#define SWAP(a, b, t) {t = a; a = b; b = t;}
double invert(Mat4& B, const Mat4& m)
{
Mat4 A = m;
int i, j, k;
double max, t, det, pivot;
/*---------- forward elimination ----------*/
for (i=0; i<4; i++) /* put identity matrix in B */
for (j=0; j<4; j++)
B(i, j) = (double)(i==j);
det = 1.0;
for (i=0; i<4; i++) { /* eliminate in column i, below diag */
max = -1.;
for (k=i; k<4; k++) /* find pivot for column i */
if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
j = k;
}
if (max<=0.) return 0.; /* if no nonzero pivot, PUNT */
if (j!=i) { /* swap rows i and j */
for (k=i; k<4; k++)
SWAP(A(i, k), A(j, k), t);
for (k=0; k<4; k++)
SWAP(B(i, k), B(j, k), t);
det = -det;
}
pivot = A(i, i);
det *= pivot;
for (k=i+1; k<4; k++) /* only do elems to right of pivot */
A(i, k) /= pivot;
for (k=0; k<4; k++)
B(i, k) /= pivot;
/* we know that A(i, i) will be set to 1, so don't bother to do it */
for (j=i+1; j<4; j++) { /* eliminate in rows below i */
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
for (k=0; k<4; k++)
B(j, k) -= B(i, k)*t;
}
}
/*---------- backward elimination ----------*/
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */
for (k=0; k<4; k++) /* subtract scaled row i from row j */
B(j, k) -= B(i, k)*t;
}
}
return det;
}
#endif // 0
// This code is in the public domain -- castanyo@yahoo.es
#include "Matrix.inl"
#include "Vector.inl"
#include "nvcore/Array.inl"
#include <float.h>
#if !NV_CC_MSVC && !NV_OS_ORBIS
#include <alloca.h>
#endif
using namespace nv;
// Given a matrix a[1..n][1..n], this routine replaces it by the LU decomposition of a rowwise
// permutation of itself. a and n are input. a is output, arranged as in equation (2.3.14) above;
// indx[1..n] is an output vector that records the row permutation effected by the partial
// pivoting; d is output as -1 depending on whether the number of row interchanges was even
// or odd, respectively. This routine is used in combination with lubksb to solve linear equations
// or invert a matrix.
static bool ludcmp(float **a, int n, int *indx, float *d)
{
const float TINY = 1.0e-20f;
float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row.
*d = 1.0; // No row interchanges yet.
for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information.
float big = 0.0;
for (int j = 0; j < n; j++) {
big = max(big, fabsf(a[i][j]));
}
if (big == 0) {
return false; // Singular matrix
}
// No nonzero largest element.
vv[i] = 1.0f / big; // Save the scaling.
}
for (int j = 0; j < n; j++) { // This is the loop over columns of Crout's method.
for (int i = 0; i < j; i++) { // This is equation (2.3.12) except for i = j.
float sum = a[i][j];
for (int k = 0; k < i; k++) sum -= a[i][k]*a[k][j];
a[i][j] = sum;
}
int imax = -1;
float big = 0.0; // Initialize for the search for largest pivot element.
for (int i = j; i < n; i++) { // This is i = j of equation (2.3.12) and i = j+ 1 : : : N
float sum = a[i][j]; // of equation (2.3.13).
for (int k = 0; k < j; k++) {
sum -= a[i][k]*a[k][j];
}
a[i][j]=sum;
float dum = vv[i]*fabs(sum);
if (dum >= big) {
// Is the figure of merit for the pivot better than the best so far?
big = dum;
imax = i;
}
}
nvDebugCheck(imax != -1);
if (j != imax) { // Do we need to interchange rows?
for (int k = 0; k < n; k++) { // Yes, do so...
swap(a[imax][k], a[j][k]);
}
*d = -(*d); // ...and change the parity of d.
vv[imax]=vv[j]; // Also interchange the scale factor.
}
indx[j]=imax;
if (a[j][j] == 0.0) a[j][j] = TINY;
// If the pivot element is zero the matrix is singular (at least to the precision of the
// algorithm). For some applications on singular matrices, it is desirable to substitute
// TINY for zero.
if (j != n-1) { // Now, finally, divide by the pivot element.
float dum = 1.0f / a[j][j];
for (int i = j+1; i < n; i++) a[i][j] *= dum;
}
} // Go back for the next column in the reduction.
return true;
}
// Solves the set of n linear equations Ax = b. Here a[1..n][1..n] is input, not as the matrix
// A but rather as its LU decomposition, determined by the routine ludcmp. indx[1..n] is input
// as the permutation vector returned by ludcmp. b[1..n] is input as the right-hand side vector
// B, and returns with the solution vector X. a, n, and indx are not modified by this routine
// and can be left in place for successive calls with different right-hand sides b. This routine takes
// into account the possibility that b will begin with many zero elements, so it is efficient for use
// in matrix inversion.
static void lubksb(float **a, int n, int *indx, float b[])
{
int ii = 0;
for (int i=0; i<n; i++) { // When ii is set to a positive value, it will become
int ip = indx[i]; // the index of the first nonvanishing element of b. We now
float sum = b[ip]; // do the forward substitution, equation (2.3.6). The
b[ip] = b[i]; // only new wrinkle is to unscramble the permutation as we go.
if (ii != 0) {
for (int j = ii-1; j < i; j++) sum -= a[i][j]*b[j];
}
else if (sum != 0.0f) {
ii = i+1; // A nonzero element was encountered, so from now on we
}
b[i] = sum; // will have to do the sums in the loop above.
}
for (int i=n-1; i>=0; i--) { // Now we do the backsubstitution, equation (2.3.7).
float sum = b[i];
for (int j = i+1; j < n; j++) {
sum -= a[i][j]*b[j];
}
b[i] = sum/a[i][i]; // Store a component of the solution vector X.
} // All done!
}
bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x)
{
nvDebugCheck(x != NULL);
float m[4][4];
float *a[4] = {m[0], m[1], m[2], m[3]};
int idx[4];
float d;
for (int y = 0; y < 4; y++) {
for (int x = 0; x < 4; x++) {
a[x][y] = A(x, y);
}
}
// Create LU decomposition.
if (!ludcmp(a, 4, idx, &d)) {
// Singular matrix.
return false;
}
// Init solution.
*x = b;
// Do back substitution.
lubksb(a, 4, idx, x->component);
return true;
}
// @@ Not tested.
Matrix nv::inverseLU(const Matrix & A)
{
Vector4 Ai[4];
solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]);
solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]);
solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]);
solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]);
return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]);
}
bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
{
nvDebugCheck(x != NULL);
float m[3][3];
float *a[3] = {m[0], m[1], m[2]};
int idx[3];
float d;
for (int y = 0; y < 3; y++) {
for (int x = 0; x < 3; x++) {
a[x][y] = A(x, y);
}
}
// Create LU decomposition.
if (!ludcmp(a, 3, idx, &d)) {
// Singular matrix.
return false;
}
// Init solution.
*x = b;
// Do back substitution.
lubksb(a, 3, idx, x->component);
return true;
}
bool nv::solveLU(const Matrix2 & A, const Vector2 & b, Vector2 * x)
{
nvDebugCheck(x != NULL);
float m[2][2];
float *a[2] = {m[0], m[1]};
int idx[2];
float d;
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
a[x][y] = A(x, y);
}
}
// Create LU decomposition.
if (!ludcmp(a, 2, idx, &d)) {
// Singular matrix.
return false;
}
// Init solution.
*x = b;
// Do back substitution.
lubksb(a, 2, idx, x->component);
return true;
}
bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
{
nvDebugCheck(x != NULL);
*x = transform(inverseCramer(A), b);
return true; // @@ Return false if determinant(A) == 0 !
}
bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
{
nvDebugCheck(x != NULL);
const float det = A.determinant();
if (equal(det, 0.0f)) { // @@ Use input epsilon.
return false;
}
Matrix3 Ai = inverseCramer(A);
*x = transform(Ai, b);
return true;
}
bool nv::solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x)
{
nvDebugCheck(x != NULL);
const float det = A.determinant();
if (equal(det, 0.0f)) { // @@ Use input epsilon.
return false;
}
Matrix2 Ai = inverseCramer(A);
*x = transform(Ai, b);
return true;
}
// Inverse using gaussian elimination. From Jon's code.
Matrix nv::inverse(const Matrix & m) {
Matrix A = m;
Matrix B(identity);
int i, j, k;
float max, t, det, pivot;
det = 1.0;
for (i=0; i<4; i++) { /* eliminate in column i, below diag */
max = -1.;
for (k=i; k<4; k++) /* find pivot for column i */
if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
j = k;
}
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
if (j!=i) { /* swap rows i and j */
for (k=i; k<4; k++)
swap(A(i, k), A(j, k));
for (k=0; k<4; k++)
swap(B(i, k), B(j, k));
det = -det;
}
pivot = A(i, i);
det *= pivot;
for (k=i+1; k<4; k++) /* only do elems to right of pivot */
A(i, k) /= pivot;
for (k=0; k<4; k++)
B(i, k) /= pivot;
/* we know that A(i, i) will be set to 1, so don't bother to do it */
for (j=i+1; j<4; j++) { /* eliminate in rows below i */
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
for (k=0; k<4; k++)
B(j, k) -= B(i, k)*t;
}
}
/*---------- backward elimination ----------*/
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */
for (k=0; k<4; k++) /* subtract scaled row i from row j */
B(j, k) -= B(i, k)*t;
}
}
return B;
}
Matrix3 nv::inverse(const Matrix3 & m) {
Matrix3 A = m;
Matrix3 B(identity);
int i, j, k;
float max, t, det, pivot;
det = 1.0;
for (i=0; i<3; i++) { /* eliminate in column i, below diag */
max = -1.;
for (k=i; k<3; k++) /* find pivot for column i */
if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
j = k;
}
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
if (j!=i) { /* swap rows i and j */
for (k=i; k<3; k++)
swap(A(i, k), A(j, k));
for (k=0; k<3; k++)
swap(B(i, k), B(j, k));
det = -det;
}
pivot = A(i, i);
det *= pivot;
for (k=i+1; k<3; k++) /* only do elems to right of pivot */
A(i, k) /= pivot;
for (k=0; k<3; k++)
B(i, k) /= pivot;
/* we know that A(i, i) will be set to 1, so don't bother to do it */
for (j=i+1; j<3; j++) { /* eliminate in rows below i */
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<3; k++) /* subtract scaled row i from row j */
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
for (k=0; k<3; k++)
B(j, k) -= B(i, k)*t;
}
}
/*---------- backward elimination ----------*/
for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */
for (k=0; k<3; k++) /* subtract scaled row i from row j */
B(j, k) -= B(i, k)*t;
}
}
return B;
}
#if 0
// Copyright (C) 1999-2004 Michael Garland.
//
// Permission is hereby granted, free of charge, to any person obtaining a
// copy of this software and associated documentation files (the
// "Software"), to deal in the Software without restriction, including
// without limitation the rights to use, copy, modify, merge, publish,
// distribute, and/or sell copies of the Software, and to permit persons
// to whom the Software is furnished to do so, provided that the above
// copyright notice(s) and this permission notice appear in all copies of
// the Software and that both the above copyright notice(s) and this
// permission notice appear in supporting documentation.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT
// OF THIRD PARTY RIGHTS. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
// HOLDERS INCLUDED IN THIS NOTICE BE LIABLE FOR ANY CLAIM, OR ANY SPECIAL
// INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING
// FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT,
// NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION
// WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
//
// Except as contained in this notice, the name of a copyright holder
// shall not be used in advertising or otherwise to promote the sale, use
// or other dealings in this Software without prior written authorization
// of the copyright holder.
// Matrix inversion code for 4x4 matrices using Gaussian elimination
// with partial pivoting. This is a specialized version of a
// procedure originally due to Paul Heckbert <ph@cs.cmu.edu>.
//
// Returns determinant of A, and B=inverse(A)
// If matrix A is singular, returns 0 and leaves trash in B.
//
#define SWAP(a, b, t) {t = a; a = b; b = t;}
double invert(Mat4& B, const Mat4& m)
{
Mat4 A = m;
int i, j, k;
double max, t, det, pivot;
/*---------- forward elimination ----------*/
for (i=0; i<4; i++) /* put identity matrix in B */
for (j=0; j<4; j++)
B(i, j) = (double)(i==j);
det = 1.0;
for (i=0; i<4; i++) { /* eliminate in column i, below diag */
max = -1.;
for (k=i; k<4; k++) /* find pivot for column i */
if (fabs(A(k, i)) > max) {
max = fabs(A(k, i));
j = k;
}
if (max<=0.) return 0.; /* if no nonzero pivot, PUNT */
if (j!=i) { /* swap rows i and j */
for (k=i; k<4; k++)
SWAP(A(i, k), A(j, k), t);
for (k=0; k<4; k++)
SWAP(B(i, k), B(j, k), t);
det = -det;
}
pivot = A(i, i);
det *= pivot;
for (k=i+1; k<4; k++) /* only do elems to right of pivot */
A(i, k) /= pivot;
for (k=0; k<4; k++)
B(i, k) /= pivot;
/* we know that A(i, i) will be set to 1, so don't bother to do it */
for (j=i+1; j<4; j++) { /* eliminate in rows below i */
t = A(j, i); /* we're gonna zero this guy */
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
for (k=0; k<4; k++)
B(j, k) -= B(i, k)*t;
}
}
/*---------- backward elimination ----------*/
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
for (j=0; j<i; j++) { /* eliminate in rows above i */
t = A(j, i); /* we're gonna zero this guy */
for (k=0; k<4; k++) /* subtract scaled row i from row j */
B(j, k) -= B(i, k)*t;
}
}
return det;
}
#endif // 0

@ -14,6 +14,46 @@ namespace nv
{
enum identity_t { identity };
// 2x2 matrix.
class NVMATH_CLASS Matrix2
{
public:
Matrix2();
explicit Matrix2(float f);
explicit Matrix2(identity_t);
Matrix2(const Matrix2 & m);
Matrix2(Vector2::Arg v0, Vector2::Arg v1);
Matrix2(float a, float b, float c, float d);
float data(uint idx) const;
float & data(uint idx);
float get(uint row, uint col) const;
float operator()(uint row, uint col) const;
float & operator()(uint row, uint col);
Vector2 row(uint i) const;
Vector2 column(uint i) const;
void operator*=(float s);
void operator/=(float s);
void operator+=(const Matrix2 & m);
void operator-=(const Matrix2 & m);
void scale(float s);
void scale(Vector2::Arg s);
float determinant() const;
private:
float m_data[4];
};
// Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix2 & m, const Vector2 & b, Vector2 * x);
// Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x);
// 3x3 matrix.
class NVMATH_CLASS Matrix3
{
@ -52,6 +92,8 @@ namespace nv
// Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
extern Matrix3 inverse(const Matrix3 & m);
// 4x4 matrix.
class NVMATH_CLASS Matrix
@ -106,7 +148,6 @@ namespace nv
// Compute inverse using Gaussian elimination and partial pivoting.
extern Matrix inverse(const Matrix & m);
extern Matrix3 inverse(const Matrix3 & m);
} // nv namespace

@ -8,6 +8,199 @@
namespace nv
{
inline Matrix2::Matrix2() {}
inline Matrix2::Matrix2(float f)
{
for(int i = 0; i < 4; i++) {
m_data[i] = f;
}
}
inline Matrix2::Matrix2(identity_t)
{
for(int i = 0; i < 2; i++) {
for(int j = 0; j < 2; j++) {
m_data[2*j+i] = (i == j) ? 1.0f : 0.0f;
}
}
}
inline Matrix2::Matrix2(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] = m.m_data[i];
}
}
inline Matrix2::Matrix2(Vector2::Arg v0, Vector2::Arg v1)
{
m_data[0] = v0.x; m_data[1] = v0.y;
m_data[2] = v1.x; m_data[3] = v1.y;
}
inline Matrix2::Matrix2(float a, float b, float c, float d)
{
m_data[0] = a; m_data[1] = b;
m_data[2] = c; m_data[3] = d;
}
inline float Matrix2::data(uint idx) const
{
nvDebugCheck(idx < 4);
return m_data[idx];
}
inline float & Matrix2::data(uint idx)
{
nvDebugCheck(idx < 4);
return m_data[idx];
}
inline float Matrix2::get(uint row, uint col) const
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline float Matrix2::operator()(uint row, uint col) const
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline float & Matrix2::operator()(uint row, uint col)
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline Vector2 Matrix2::row(uint i) const
{
nvDebugCheck(i < 2);
return Vector2(get(i, 0), get(i, 1));
}
inline Vector2 Matrix2::column(uint i) const
{
nvDebugCheck(i < 2);
return Vector2(get(0, i), get(1, i));
}
inline void Matrix2::operator*=(float s)
{
for(int i = 0; i < 4; i++) {
m_data[i] *= s;
}
}
inline void Matrix2::operator/=(float s)
{
float is = 1.0f /s;
for(int i = 0; i < 4; i++) {
m_data[i] *= is;
}
}
inline void Matrix2::operator+=(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] += m.m_data[i];
}
}
inline void Matrix2::operator-=(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] -= m.m_data[i];
}
}
inline Matrix2 operator+(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m = a;
m += b;
return m;
}
inline Matrix2 operator-(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m = a;
m -= b;
return m;
}
inline Matrix2 operator*(const Matrix2 & a, float s)
{
Matrix2 m = a;
m *= s;
return m;
}
inline Matrix2 operator*(float s, const Matrix2 & a)
{
Matrix2 m = a;
m *= s;
return m;
}
inline Matrix2 operator/(const Matrix2 & a, float s)
{
Matrix2 m = a;
m /= s;
return m;
}
inline Matrix2 mul(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m;
for(int i = 0; i < 2; i++) {
const float ai0 = a(i,0), ai1 = a(i,1);
m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0);
m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1);
}
return m;
}
inline Matrix2 operator*(const Matrix2 & a, const Matrix2 & b)
{
return mul(a, b);
}
// Transform the given 3d vector with the given matrix.
inline Vector2 transform(const Matrix2 & m, const Vector2 & p)
{
return Vector2(p.x * m(0,0) + p.y * m(0,1),
p.x * m(1,0) + p.y * m(1,1));
}
inline void Matrix2::scale(float s)
{
for (int i = 0; i < 4; i++) {
m_data[i] *= s;
}
}
inline void Matrix2::scale(Vector2::Arg s)
{
m_data[0] *= s.x; m_data[1] *= s.x;
m_data[2] *= s.y; m_data[3] *= s.y;
}
inline float Matrix2::determinant() const
{
return get(0,0) * get(1,1) - get(0,1) * get(1,0);
}
// Inverse using Cramer's rule.
inline Matrix2 inverseCramer(const Matrix2 & m)
{
const float det = m.determinant();
if (equal(det, 0.0f, 0.0f)) {
return Matrix2(0);
}
return m * (1/det);
}
inline Matrix3::Matrix3() {}
inline Matrix3::Matrix3(float f)
@ -16,7 +209,7 @@ namespace nv
m_data[i] = f;
}
}
inline Matrix3::Matrix3(identity_t)
{
for(int i = 0; i < 3; i++) {
@ -794,7 +987,7 @@ v1 = FXVector3.Cross(v3, v2);
v1.Normalize();
Matrix R = Matrix::Identity;
R[0, 0] = v3.X; // Not sure this is in the correct order...
R[0, 0] = v3.X; // Not sure this is in the correct order...
R[1, 0] = v3.Y;
R[2, 0] = v3.Z;
R[0, 1] = v1.X;

@ -7,10 +7,6 @@
#include "nvmath.h"
#include "Vector.h"
#if NV_USE_ALTIVEC
#undef vector
#endif
namespace nv
{
class Matrix;
@ -29,6 +25,7 @@ namespace nv
Vector3 vector() const;
float offset() const;
Vector3 normal() const;
void operator*=(float s);

@ -24,6 +24,7 @@ namespace nv
inline Vector3 Plane::vector() const { return v.xyz(); }
inline float Plane::offset() const { return v.w; }
inline Vector3 Plane::normal() const { return normalize(vector(), 0.0f); }
// Normalize plane.
inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)

File diff suppressed because it is too large Load Diff

@ -194,15 +194,20 @@ namespace nv
#endif
}
inline uint log2(uint i)
inline uint log2(uint32 i)
{
uint value = 0;
while( i >>= 1 ) {
value++;
}
uint32 value = 0;
while( i >>= 1 ) value++;
return value;
}
inline uint log2(uint64 i)
{
uint64 value = 0;
while (i >>= 1) value++;
return U32(value);
}
inline float lerp(float f0, float f1, float t)
{
const float s = 1.0f - t;

@ -106,6 +106,11 @@ namespace nv {
#error "Atomics not implemented."
#endif
}
inline void storeRelease(volatile float * ptr, float value)
{
storeRelease((uint32 *)ptr, *(uint32 *)&value);
}
template <typename T>

@ -17,7 +17,7 @@ struct Event::Private {
};
Event::Event() : m(new Private) {
m->handle = CreateEvent(NULL, FALSE, FALSE, NULL);
m->handle = CreateEvent(/*lpEventAttributes=*/NULL, /*bManualReset=*/FALSE, /*bInitialState=*/FALSE, /*lpName=*/NULL);
}
Event::~Event() {

@ -13,7 +13,9 @@
#endif // NV_OS
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h>
extern HTELEMETRY tmContext;
#endif
@ -45,14 +47,19 @@ Mutex::~Mutex ()
void Mutex::lock()
{
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmStartWaitForLock(0, 0, this, m->name);
#elif NV_USE_TELEMETRY
TmU64 matcher;
tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked");
#endif
EnterCriticalSection(&m->mutex);
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmEndWaitForLock(0);
tmAcquiredLock(0, 0, this, m->name);
#elif NV_USE_TELEMETRY
tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS);
tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired");
#endif
@ -60,7 +67,18 @@ void Mutex::lock()
bool Mutex::tryLock()
{
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmStartWaitForLock(0, 0, this, m->name);
if (TryEnterCriticalSection(&m->mutex) != 0) {
tmEndWaitForLock(0);
tmAcquiredLock(0, 0, this, m->name);
return true;
}
else {
tmEndWaitForLock(0);
return false;
}
#elif NV_USE_TELEMETRY
TmU64 matcher;
tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked");
if (TryEnterCriticalSection(&m->mutex) != 0) {
@ -79,7 +97,9 @@ bool Mutex::tryLock()
void Mutex::unlock()
{
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmReleasedLock(0, this);
#elif NV_USE_TELEMETRY
tmSetLockState(tmContext, this, TMLS_RELEASED, "released");
#endif
@ -90,13 +110,17 @@ void Mutex::unlock()
struct Mutex::Private {
pthread_mutex_t mutex;
pthread_mutexattr_t attr;
const char * name;
};
Mutex::Mutex (const char * name) : m(new Private)
{
int result = pthread_mutex_init(&m->mutex, NULL);
pthread_mutexattr_init(&m->attr);
pthread_mutexattr_settype(&m->attr, PTHREAD_MUTEX_RECURSIVE);
int result = pthread_mutex_init(&m->mutex, &m->attr);
//m->mutex = PTHREAD_MUTEX_INITIALIZER;
m->name = name;
nvDebugCheck(result == 0);
}
@ -105,6 +129,8 @@ Mutex::~Mutex ()
{
int result = pthread_mutex_destroy(&m->mutex);
nvDebugCheck(result == 0);
result = pthread_mutexattr_destroy(&m->attr);
nvDebugCheck(result == 0);
}
void Mutex::lock()

@ -9,7 +9,11 @@
#include <unistd.h> // usleep
#endif
#if NV_USE_TELEMETRY
#include "nvcore/StrLib.h"
#if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h>
extern HTELEMETRY tmContext;
#endif
@ -118,16 +122,12 @@ void Thread::start(ThreadFunc * func, void * arg)
nvDebugCheck(p->thread != NULL);
if (p->name != NULL) {
setThreadName(threadId, p->name);
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmThreadName(0, threadId, p->name);
#elif NV_USE_TELEMETRY
tmThreadName(tmContext, threadId, p->name);
#endif
}
#elif NV_OS_ORBIS
int ret = scePthreadCreate(&p->thread, NULL, threadFunc, p.ptr(), p->name ? p->name : "nv::Thread");
nvDebugCheck(ret == 0);
// use any non-system core
scePthreadSetaffinity(p->thread, 0x3F);
scePthreadSetprio(p->thread, (SCE_KERNEL_PRIO_FIFO_DEFAULT + SCE_KERNEL_PRIO_FIFO_HIGHEST) / 2);
#elif NV_OS_USE_PTHREAD
int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr());
nvDebugCheck(result == 0);

@ -8,7 +8,9 @@
#include "nvcore/Utils.h"
#include "nvcore/StrLib.h"
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h>
extern HTELEMETRY tmContext;
#endif
@ -84,7 +86,9 @@ AutoPtr<ThreadPool> s_pool;
}
{
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmZone(0, TMZF_NONE, "worker");
#elif NV_USE_TELEMETRY
tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker");
#endif
func(s_pool->arg, s_pool->useCallingThread + i);
@ -116,11 +120,11 @@ ThreadPool::ThreadPool(uint workerCount/*=processorCount()*/, bool useThreadAffi
lockThreadToProcessor(0); // Calling thread always locked to processor 0.
}
StringBuilder name;
for (uint i = 0; i < threadCount; i++) {
StringBuilder name;
name.format("worker %d", i);
workers[i].setName(name.release()); // @Leak
workers[i].start(workerFunc, (void *)i);
workers[i].start(workerFunc, (void *)(uintptr_t)i);
}
allIdle = true;
@ -141,9 +145,6 @@ ThreadPool::~ThreadPool()
void ThreadPool::run(ThreadTask * func, void * arg)
{
// Wait until threads are idle.
wait();
start(func, arg);
if (useCallingThread) {

@ -85,7 +85,9 @@ uint nv::processorCount() {
return count;
#elif NV_OS_ORBIS
return 6;
return 6;
#elif NV_OS_DURANGO
return 6;
#elif NV_OS_XBOX
return 3; // or 6?
#elif NV_OS_LINUX || NV_OS_NETBSD // Linux, Solaris, & AIX

@ -25,6 +25,7 @@
#include "BlockCompressor.h"
#include "OutputOptions.h"
#include "TaskDispatcher.h"
#include "CompressionOptions.h"
#include "nvimage/Image.h"
#include "nvimage/ColorBlock.h"
@ -33,6 +34,7 @@
#include "nvmath/Vector.inl"
#include "nvcore/Memory.h"
#include "nvcore/Array.inl"
#include <new> // placement new
@ -40,85 +42,13 @@
using namespace nv;
using namespace nvtt;
/*
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
#if defined(HAVE_OPENMP)
bool singleThreaded = false;
#else
bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{
nvDebugCheck(bs <= 16);
uint8 mem[16]; // @@ Output one row at a time!
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
rgba.init(w, h, data, x, y);
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs);
}
}
}
}
#if defined(HAVE_OPENMP)
else
{
const uint size = bs * bw * bh;
uint8 * mem = new uint8[size];
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < int(bw*bh); i++)
{
const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
rgba.init(w, h, data, 4*x, 4*y);
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
}
#endif
}
*/
struct CompressorContext
{
nvtt::AlphaMode alphaMode;
AlphaMode alphaMode;
uint w, h, d;
const float * data;
const nvtt::CompressionOptions::Private * compressionOptions;
const CompressionOptions::Private * compressionOptions;
uint bw, bh, bs;
uint8 * mem;
@ -144,7 +74,7 @@ void ColorBlockCompressorTask(void * data, int i)
}
}
void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
void ColorBlockCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
@ -182,66 +112,6 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
delete [] context.mem;
}
#if 0
// Each task compresses one block.
void ColorSetCompressorTask(void * data, int i)
{
CompressorContext * d = (CompressorContext *) data;
uint x = i % d->bw;
uint y = i / d->bw;
//for (uint x = 0; x < d->bw; x++)
{
ColorSet set;
set.setColors(d->data, d->w, d->h, x * 4, y * 4);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
((ColorSetCompressor *)d->compressor)->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr);
}
}
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
CompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
context.compressor = this;
SequentialTaskDispatcher sequential;
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
#if _DEBUG
dispatcher = &sequential;
#endif
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(ColorSetCompressorTask, &context, count);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}
#endif // 0
// Each task compresses one block.
void FloatColorCompressorTask(void * data, int i)
{
@ -262,8 +132,8 @@ void FloatColorCompressorTask(void * data, int i)
Vector4 colors[16];
float weights[16];
const uint block_w = min(d->w - block_x * 4U, 4U);
const uint block_h = min(d->h - block_y * 4U, 4U);
const uint block_w = min(d->w - block_x * 4, 4U);
const uint block_h = min(d->h - block_y * 4, 4U);
uint x, y;
for (y = 0; y < block_h; y++) {
@ -274,7 +144,7 @@ void FloatColorCompressorTask(void * data, int i)
colors[dst_idx].y = g[src_idx];
colors[dst_idx].z = b[src_idx];
colors[dst_idx].w = a[src_idx];
weights[dst_idx] = (d->alphaMode == nvtt::AlphaMode_Transparency) ? a[src_idx] : 1.0f;
weights[dst_idx] = (d->alphaMode == AlphaMode_Transparency) ? saturate(a[src_idx]) : 1.0f;
}
for (; x < 4; x++) {
uint dst_idx = 4 * y + x;
@ -289,14 +159,14 @@ void FloatColorCompressorTask(void * data, int i)
weights[dst_idx] = 0.0f;
}
}
// Compress block.
uint8 * output = d->mem + (block_y * d->bw + block_x) * d->bs;
((FloatColorCompressor *)d->compressor)->compressBlock(colors, weights, *d->compressionOptions, output);
}
void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures.
@ -308,7 +178,7 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bs = blockSize(compressionOptions);
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
@ -333,3 +203,466 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
delete [] context.mem;
}
// BC1
#include "CompressorDXT1.h"
void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1_fast(colors, weights, compressionOptions.colorWeight.xyz(), (BlockDXT1 *)output);
}
void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output);
}
// @@ BC1a
// @@ BC2
// @@ BC3
// BC3_RGBM
#include "CompressorDXT5_RGBM.h"
void CompressorBC3_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt5_rgbm(colors, weights, compressionOptions.rgbmThreshold, (BlockDXT5 *)output);
}
// ETC
#include "CompressorETC.h"
void CompressorETC1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_etc1(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
void CompressorETC2_R::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Change radius based on quality.
compress_eac(colors, weights, /*input_channel=*/1, /*search_radius=*/1, /*use_11bit_mode=*/true, output);
}
void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
//compress_eac_rg(colors, weights, 1, 2, output);
}
void CompressorETC2_RGB::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Tweak quality options.
compress_etc2(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
void CompressorETC2_RGBA::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Tweak quality options.
// @@ Change radius based on quality.
compress_etc2_eac(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
/*void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Change radius based on quality.
compress_eac_rg(colors, weights, compressionOptions.colorWeight.xyz(), output);
}*/
void CompressorETC2_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_etc2_rgbm(colors, weights, compressionOptions.rgbmThreshold, output);
}
// External compressors.
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
void AtiCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
// @@ Floating point input is not swizzled.
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
void SquishCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
nvDebugCheck(false);
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)
#if defined(HAVE_ETCLIB)
#include "Etc.h"
void EtcLibCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
//nvCheck(d == 1); // Encode one layer at a time?
Etc::Image::Format format;
if (compressionOptions.format == Format_ETC1) {
format = Etc::Image::Format::ETC1;
}
else if (compressionOptions.format == Format_ETC2_R) {
format = Etc::Image::Format::R11;
}
else if (compressionOptions.format == Format_ETC2_RG) {
format = Etc::Image::Format::RG11;
}
else if (compressionOptions.format == Format_ETC2_RGB) {
format = Etc::Image::Format::RGB8;
//format = Etc::Image::Format::SRGB8;
}
else if (compressionOptions.format == Format_ETC2_RGBA) {
format = Etc::Image::Format::RGBA8;
//format = Etc::Image::Format::SRGBA8;
}
else if (compressionOptions.format == Format_ETC2_RGB_A1) {
format = Etc::Image::Format::RGB8A1;
//format = Etc::Image::Format::SRGB8A1;
}
else {
nvCheck(false);
return;
}
Etc::ErrorMetric error_metric = Etc::ErrorMetric::RGBA;
// @@ Use normal compression metric for normals?
//if (compressionOptions.)
// @@ Adjust based on quality.
int effort = ETCCOMP_DEFAULT_EFFORT_LEVEL;
// @@ What are the defaults?
uint jobs = 4;
uint max_jobs = 4;
uint8 * out_data = NULL;
uint out_size = 0;
uint out_width = 0;
uint out_height = 0;
int out_time = 0;
// Swizzle color data.
nv::Array<float> tmp;
uint count = w * h;
tmp.resize(4 * count);
for (uint i = 0; i < count; i++) {
tmp[4*i+0] = data[count*0 + i];
tmp[4*i+1] = data[count*1 + i];
tmp[4*i+2] = data[count*2 + i];
tmp[4*i+3] = data[count*3 + i];
}
Etc::Encode(tmp.buffer(), w, h, format, error_metric, effort, jobs, max_jobs, &out_data, &out_size, &out_width, &out_height, &out_time);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(out_data, I32(out_size));
}
}
#endif
#if defined(HAVE_RGETC)
#include "rg_etc1.h"
NV_AT_STARTUP(rg_etc1::pack_etc1_block_init());
void RgEtcCompressor::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
rg_etc1::etc1_pack_params pack_params;
pack_params.m_quality = rg_etc1::cMediumQuality;
if (compressionOptions.quality == Quality_Fastest) pack_params.m_quality = rg_etc1::cLowQuality;
else if (compressionOptions.quality == Quality_Production) pack_params.m_quality = rg_etc1::cHighQuality;
else if (compressionOptions.quality == Quality_Highest) pack_params.m_quality = rg_etc1::cHighQuality;
else if (compressionOptions.quality == Quality_Normal) pack_params.m_quality = rg_etc1::cMediumQuality;
rgba.swizzle(2, 1, 0, 3);
rg_etc1::pack_etc1_block(output, (uint *)rgba.colors(), pack_params);
//Vector4 result[16];
//nv::decompress_etc(output, result);
}
#endif
#if defined(HAVE_PVRTEXTOOL)
#include <PVRTextureUtilities.h> // for CPVRTexture, CPVRTextureHeader, PixelType, Transcode
#include "nvmath/Color.inl"
void CompressorPVR::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
EPVRTColourSpace color_space = ePVRTCSpacelRGB;
//pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('b','g','r','a',8,8,8,8);
pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('r','g','b',0,8,8,8,0);
pvrtexture::CPVRTextureHeader header(src_pixel_type.PixelTypeID, w, h, d, 1/*num mips*/, 1/*num array*/, 1/*num faces*/, color_space, ePVRTVarTypeUnsignedByteNorm);
/*
uint count = w * h * d;
Array<Color32> tmp;
tmp.resize(count);
for (uint i = 0; i < count; i++) {
tmp[i] = toColor32(Vector4(data[0*count + i], data[1*count + i], data[2*count + i], data[3*count + i]));
}
*/
uint count = w * h * d;
Array<uint8> tmp;
tmp.resize(3 * count);
for (uint i = 0; i < count; i++) {
tmp[3*i+0] = data[0*count + i] * 255.0f;
tmp[3*i+1] = data[1*count + i] * 255.0f;
tmp[3*i+2] = data[2*count + i] * 255.0f;
}
pvrtexture::CPVRTexture texture(header, tmp.buffer());
pvrtexture::PixelType dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB);
if (compressionOptions.format == Format_PVR_2BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB);
else if (compressionOptions.format == Format_PVR_4BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGB);
else if (compressionOptions.format == Format_PVR_2BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGBA);
else if (compressionOptions.format == Format_PVR_4BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGBA);
bool success = pvrtexture::Transcode(texture, dst_pixel_type, ePVRTVarTypeUnsignedByteNorm, color_space, pvrtexture::ePVRTCNormal, false);
if (success) {
uint size = 0;
if (compressionOptions.format == Format_PVR_2BPP_RGB || compressionOptions.format == Format_PVR_2BPP_RGBA) {
// 2 bpp
const uint bpp = 2u;
const uint block_size = 8u * 4u;
const uint size_factor=(block_size*bpp)>>3u;
const uint block_width=nv::max((w>>3u), 2u);
const uint block_height=nv::max((h>>2u), 2u);
size = d * block_width * block_height * size_factor;
}
else {
// 4 bpp
const uint bpp = 4u;
const uint block_size = 4u * 4u;
const uint size_factor = (block_size*bpp) >> 3u;
const uint block_width = max((w>>2u), 2u);
const uint block_height = max((h>>2u), 2u);
size = d * block_width * block_height * size_factor;
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(texture.getDataPtr(), I32(size));
}
}
}
#endif

@ -27,7 +27,6 @@
#include "Compressor.h"
namespace nv
{
struct ColorBlock;
@ -45,9 +44,149 @@ namespace nv
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const = 0;
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize(const nvtt::CompressionOptions::Private & compressionOptions) const = 0;
};
// BC1
struct FastCompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
struct CompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
// BC3
struct CompressorBC3_RGBM : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; }
};
// ETC
struct CompressorETC1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
struct CompressorETC2_R : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; }
};
struct CompressorETC2_RG : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorETC2_RGB : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; }
};
struct CompressorETC2_RGBA : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorETC2_RGBM : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
#if NV_USE_CRUNCH
struct CrunchCompressorETC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if NV_USE_INTEL_ISPC_TC
struct IspcCompressorBC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorBC3 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorBC7 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorETC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_ETCLIB)
struct EtcLibCompressor : public CompressorInterface
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_RGETC)
struct RgEtcCompressor : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
#if defined(HAVE_PVRTEXTOOL)
struct CompressorPVR : public CompressorInterface
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
} // nv namespace

@ -13,6 +13,7 @@ SET(NVTT_SRCS
CompressorDX11.h CompressorDX11.cpp
CompressorDXT1.h CompressorDXT1.cpp
CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp
CompressorETC.h CompressorETC.cpp
CompressorRGB.h CompressorRGB.cpp
Context.h Context.cpp
QuickCompressDXT.h QuickCompressDXT.cpp
@ -38,6 +39,7 @@ IF (CUDA_FOUND)
ENDIF (CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104)
ADD_DEFINITIONS(-DNVTT_EXPORTS)
@ -47,7 +49,7 @@ ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath rg_etc1)
INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin

@ -38,79 +38,6 @@ ClusterFit::ClusterFit()
{
}
#if 0 // @@ Deprecate. Do not use color set directly.
void ClusterFit::setColorSet(const ColorSet * set)
{
// initialise the best error
#if NVTT_USE_SIMD
m_besterror = SimdVector( FLT_MAX );
Vector3 metric = m_metric.toVector3();
#else
m_besterror = FLT_MAX;
Vector3 metric = m_metric;
#endif
// cache some values
m_count = set->colorCount;
Vector3 values[16];
for (uint i = 0; i < m_count; i++)
{
values[i] = set->colors[i].xyz();
}
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric);
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric);
// build the list of values
int order[16];
float dps[16];
for (uint i = 0; i < m_count; ++i)
{
dps[i] = dot(values[i], principal);
order[i] = i;
}
// stable sort
for (uint i = 0; i < m_count; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
// weight all the points
#if NVTT_USE_SIMD
m_xxsum = SimdVector( 0.0f );
m_xsum = SimdVector( 0.0f );
#else
m_xxsum = Vector3(0.0f);
m_xsum = Vector3(0.0f);
m_wsum = 0.0f;
#endif
for (uint i = 0; i < m_count; ++i)
{
int p = order[i];
#if NVTT_USE_SIMD
NV_ALIGN_16 Vector4 tmp(values[p], 1);
m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]);
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
#else
m_weighted[i] = values[p] * set->weights[p];
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
m_weights[i] = set->weights[p];
m_wsum += m_weights[i];
#endif
}
}
#endif // 0
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
{
// initialise the best error
@ -412,13 +339,13 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
#else
inline Vector3 round565(const Vector3 & v) {
uint r = ftoi_trunc(v.x * 31.0f);
uint r = ftoi_trunc(v.x * 31.0f);
float r0 = float(((r+0) << 3) | ((r+0) >> 2));
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U);
r = (r << 3) | (r >> 2);
r = (r << 3) | (r >> 2);
uint g = ftoi_trunc(v.y * 63.0f);
uint g = ftoi_trunc(v.y * 63.0f);
float g0 = float(((g+0) << 2) | ((g+0) >> 4));
float g1 = float(((g+1) << 2) | ((g+1) >> 4));
if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U);
@ -428,8 +355,8 @@ inline Vector3 round565(const Vector3 & v) {
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U);
b = (b << 3) | (b >> 2);
b = (b << 3) | (b >> 2);
return Vector3(float(r)/255, float(g)/255, float(b)/255);
}

@ -50,7 +50,8 @@ void CompressionOptions::reset()
m.format = Format_DXT1;
m.quality = Quality_Normal;
m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f);
m.rgbmThreshold = 0.15f;
m.bitcount = 32;
m.bmask = 0x000000FF;
m.gmask = 0x0000FF00;
@ -102,6 +103,11 @@ void CompressionOptions::setColorWeights(float red, float green, float blue, flo
m.colorWeight.set(red, green, blue, alpha);
}
void CompressionOptions::setRGBMThreshold(float min_m)
{
m.rgbmThreshold = min_m;
}
/// Set color mask to describe the RGB/RGBA format.
void CompressionOptions::setPixelFormat(uint bitCount, uint rmask, uint gmask, uint bmask, uint amask)
@ -162,7 +168,7 @@ void CompressionOptions::setPixelType(PixelType pixelType)
/// Set pitch alignment in bytes.
void CompressionOptions::setPitchAlignment(int pitchAlignment)
{
nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(pitchAlignment));
nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(U32(pitchAlignment)));
m.pitchAlignment = pitchAlignment;
}
@ -194,6 +200,10 @@ void CompressionOptions::setTargetDecoder(Decoder decoder)
}
Format CompressionOptions::format() const
{
return m.format;
}
// Translate to and from D3D formats.
unsigned int CompressionOptions::d3d9Format() const
@ -246,10 +256,20 @@ unsigned int CompressionOptions::d3d9Format() const
FOURCC_ATI2, // Format_BC5
FOURCC_DXT1, // Format_DXT1n
0, // Format_CTX1
MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6
MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7
//FOURCC_ATI2, // Format_BC5_Luma
FOURCC_DXT5, // Format_BC3_RGBM
FOURCC_BC6H, // Format_BC6
FOURCC_BC7L, // Format_BC7
FOURCC_DXT5, // Format_BC3_RGBM
NV_MAKEFOURCC('E', 'T', 'C', '1'), // Format_ETC1
0, // Format_ETC2_R
0, // Format_ETC2_RG
NV_MAKEFOURCC('E', 'T', 'C', '2'), // Format_ETC2_RGB
0, // Format_ETC2_RGBA
0, // Format_ETC2_RGB_A1
0, // Format_ETC2_RGBM
FOURCC_PVR0,
FOURCC_PVR1,
FOURCC_PVR2,
FOURCC_PVR3,
};
NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count);
@ -258,12 +278,80 @@ unsigned int CompressionOptions::d3d9Format() const
}
}
/*
bool CompressionOptions::setDirect3D9Format(unsigned int format)
unsigned int CompressionOptions::dxgiFormat() const // @@ Add srgb flag.
{
if (m.format == Format_RGB) {
if (m.pixelType == PixelType_UnsignedNorm) {
uint bitcount = m.bitcount;
uint rmask = m.rmask;
uint gmask = m.gmask;
uint bmask = m.bmask;
uint amask = m.amask;
if (bitcount == 0) {
bitcount = m.rsize + m.gsize + m.bsize + m.asize;
rmask = ((1 << m.rsize) - 1) << (m.asize + m.bsize + m.gsize);
gmask = ((1 << m.gsize) - 1) << (m.asize + m.bsize);
bmask = ((1 << m.bsize) - 1) << m.asize;
amask = ((1 << m.asize) - 1) << 0;
}
if (bitcount <= 32) {
return nv::findDXGIFormat(bitcount, rmask, gmask, bmask, amask);
}
else {
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_UNORM;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_UNORM;
}
}
else if (m.pixelType == PixelType_Float) {
if (m.rsize == 16 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16_FLOAT;
if (m.rsize == 32 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32_FLOAT;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_FLOAT;
if (m.rsize == 32 && m.gsize == 32 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32G32_FLOAT;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_FLOAT;
if (m.rsize == 32 && m.gsize == 32 && m.bsize == 32 && m.asize == 32) return DXGI_FORMAT_R32G32B32A32_FLOAT;
}
return 0;
}
else {
uint dxgi_formats[] = {
0, // Format_RGB,
DXGI_FORMAT_BC1_UNORM, // Format_DXT1
DXGI_FORMAT_BC1_UNORM, // Format_DXT1a
DXGI_FORMAT_BC2_UNORM, // Format_DXT3
DXGI_FORMAT_BC3_UNORM, // Format_DXT5
DXGI_FORMAT_BC3_UNORM, // Format_DXT5n
DXGI_FORMAT_BC4_UNORM, // Format_BC4
DXGI_FORMAT_BC5_UNORM, // Format_BC5
DXGI_FORMAT_BC1_UNORM, // Format_DXT1n
0, // Format_CTX1
DXGI_FORMAT_BC6H_UF16, // Format_BC6
DXGI_FORMAT_BC7_UNORM, // Format_BC7
DXGI_FORMAT_BC5_UNORM, // Format_BC3_RGBM
0, // Format_ETC1
0, // Format_ETC2_R
0, // Format_ETC2_RG
0, // Format_ETC2_RGB
0, // Format_ETC2_RGBA
0, // Format_ETC2_RGB_A1
0, // Format_ETC2_RGBM
0, // Format_PVR_2BPP_RGB
0, // Format_PVR_4BPP_RGB
0, // Format_PVR_2BPP_RGBA
0, // Format_PVR_4BPP_RGB
};
NV_COMPILER_CHECK(NV_ARRAY_SIZE(dxgi_formats) == Format_Count);
return dxgi_formats[m.format];
}
}
unsigned int CompressionOptions::dxgiFormat() const
/*
bool CompressionOptions::setDirect3D9Format(unsigned int format)
{
}

@ -39,7 +39,8 @@ namespace nvtt
Quality quality;
nv::Vector4 colorWeight;
float rgbmThreshold;
// Pixel format description.
uint bitcount;
uint rmask;

@ -30,6 +30,7 @@
namespace nv
{
struct CompressorInterface
{
virtual ~CompressorInterface() {}

@ -39,7 +39,7 @@ using namespace nv;
using namespace nvtt;
void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
void CompressorBC6::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
@ -77,7 +77,7 @@ void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[
ZOH::compress(zohTile, (char *)output);
}
void CompressorBC7::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
void CompressorBC7::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)

@ -30,14 +30,14 @@ namespace nv
{
struct CompressorBC6 : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorBC7 : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
} // nv namespace

@ -28,7 +28,7 @@
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include "ClusterFit.h"
#include "CompressorDXT1.h"
//#include "CompressorDXT1.h"
#include "CompressorDXT5_RGBM.h"
// squish
@ -48,45 +48,11 @@
#include <new> // placement new
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
#endif
// squish
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
#endif
// d3dx
#if defined(HAVE_D3DX)
#include <d3dx9.h>
#endif
// stb
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
#endif
using namespace nv;
using namespace nvtt;
void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
QuickCompress::compressDXT1(rgba, block);
}
void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
@ -115,39 +81,13 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
}
#if 1
void CompressorDXT1::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output);
}
#else
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
if (rgba.isSingleColor())
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1(rgba.color(0), block);
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}
}
#endif
void CompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
uint alphaMask = 0;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a == 0) alphaMask |= (3 << (i * 2)); // Set two bits for each color.
if (rgba.color(i).a == 0) alphaMask |= (3U << (i * 2U)); // Set two bits for each color.
}
const bool isSingleColor = rgba.isSingleColor();
@ -284,216 +224,6 @@ void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode
}
void CompressorBC3_RGBM::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
float min_m = 0.25f; // @@ Get from compression options.
compress_dxt5_rgbm(colors, weights, min_m, (BlockDXT5 *)output);
}
#if defined(HAVE_ATITC)
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
// @@ Floating point input is not swizzled.
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
nvDebugCheck(false);
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)

@ -32,12 +32,6 @@ namespace nv
struct ColorBlock;
// Fast CPU compressors.
struct FastCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorDXT1a : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
@ -64,19 +58,6 @@ namespace nv
// Normal CPU compressors.
#if 1
struct CompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#else
struct CompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
struct CompressorDXT1a : public ColorBlockCompressor
{
@ -108,47 +89,9 @@ namespace nv
virtual uint blockSize() const { return 16; }
};
struct CompressorBC3_RGBM : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
} // nv namespace

@ -218,13 +218,13 @@ static int evaluate_mse(const Color32 & p, const Color32 & c) {
return (square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b));
}
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) {
/*static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) {
float e0 = evaluate_mse(palette[0], c, w);
float e1 = evaluate_mse(palette[1], c, w);
float e2 = evaluate_mse(palette[2], c, w);
float e3 = evaluate_mse(palette[3], c, w);
return min(min(e0, e1), min(e2, e3));
}
}*/
static int evaluate_mse(const Color32 palette[4], const Color32 & c) {
int e0 = evaluate_mse(palette[0], c);
@ -245,12 +245,12 @@ static int evaluate_mse(const BlockDXT1 * output, Color32 color, int index) {
// Returns weighted MSE error in [0-255] range.
static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) {
float total = 0.0f;
for (int i = 0; i < count; i++) {
float total = 0.0f;
for (int i = 0; i < count; i++) {
total += weights[i] * evaluate_mse(palette, colors[i]);
}
}
return total;
return total;
}
#if 0
@ -337,7 +337,7 @@ static void evaluate_palette(Color16 c0, Color16 c1, Vector3 palette[4]) {
}
}
static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
/*static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
nvDebugCheck(c0.u > c1.u);
Color32 palette32[4];
@ -346,7 +346,7 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
for (int i = 0; i < 4; i++) {
palette[i] = color_to_vector3(palette32[i]);
}
}
}*/
@ -355,38 +355,38 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
static uint compute_indices4(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) {
uint indices = 0;
for (int i = 0; i < 16; i++) {
float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights);
float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights);
float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights);
float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
for (int i = 0; i < 16; i++) {
float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights);
float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights);
float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights);
float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
return indices;
}
static uint compute_indices(const Vector4 input_colors[16], const Vector3 & color_weights, const Vector3 palette[4]) {
uint indices = 0;
for (int i = 0; i < 16; i++) {
float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights);
float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights);
float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights);
float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights);
for (int i = 0; i < 16; i++) {
float d0 = evaluate_mse(palette[0], input_colors[i].xyz(), color_weights);
float d1 = evaluate_mse(palette[1], input_colors[i].xyz(), color_weights);
float d2 = evaluate_mse(palette[2], input_colors[i].xyz(), color_weights);
float d3 = evaluate_mse(palette[3], input_colors[i].xyz(), color_weights);
uint index;
if (d0 < d1 && d0 < d2 && d0 < d3) index = 0;
else if (d1 < d2 && d1 < d3) index = 1;
@ -491,7 +491,8 @@ float nv::compress_dxt1_single_color(const Vector3 * colors, const float * weigh
// Decompress block color.
Color32 palette[4];
output->evaluatePalette(palette, /*d3d9=*/false);
evaluate_palette(output->col0, output->col1, palette);
//output->evaluatePalette(palette, /*d3d9=*/false);
Vector3 block_color = color_to_vector3(palette[output->indices & 0x3]);
@ -668,7 +669,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight
// This is too expensive, even with a low threshold.
// If high quality:
if (0) {
if (/* DISABLES CODE */ (0)) {
BlockDXT1 exhaustive_output;
float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output);
@ -720,7 +721,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight
// Least squares fitting of color end points for the given indices. @@ Take weights into account.
static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b)
static bool optimize_end_points4(uint indices, const Vector4 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
@ -739,8 +740,8 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i];
betax_sum += beta * colors[i];
alphax_sum += alpha * colors[i].xyz();
betax_sum += beta * colors[i].xyz();
}
float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
@ -756,7 +757,7 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec
// Least squares fitting of color end points for the given indices. @@ This does not support black/transparent index. @@ Take weights into account.
static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b)
static bool optimize_end_points3(uint indices, const Vector3 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
@ -794,6 +795,90 @@ static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vec
// find minimum and maximum colors based on bounding box in color space
inline static void fit_colors_bbox(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
*c0 = Vector3(0);
*c1 = Vector3(255);
for (int i = 0; i < count; i++) {
*c0 = max(*c0, colors[i]);
*c1 = min(*c1, colors[i]);
}
}
inline static void select_diagonal(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 center = (*c0 + *c1) * 0.5f;
Vector2 covariance = Vector2(0);
for (int i = 0; i < count; i++) {
Vector3 t = colors[i] - center;
covariance += t.xy() * t.z;
}
float x0 = c0->x;
float y0 = c0->y;
float x1 = c1->x;
float y1 = c1->y;
if (covariance.x < 0) {
swap(x0, x1);
}
if (covariance.y < 0) {
swap(y0, y1);
}
c0->set(x0, y0, c0->z);
c1->set(x1, y1, c1->z);
}
inline static void inset_bbox(Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 inset = (*c0 - *c1) / 16.0f - (8.0f / 255.0f) / 16.0f;
*c0 = clamp(*c0 - inset, 0.0f, 255.0f);
*c1 = clamp(*c1 + inset, 0.0f, 255.0f);
}
float nv::compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output)
{
Vector3 colors[16];
float weights[16];
int count = reduce_colors(input_colors, input_weights, colors, weights);
if (count == 0) {
// Output trivial block.
output->col0.u = 0;
output->col1.u = 0;
output->indices = 0;
return 0;
}
float error = FLT_MAX;
error = compress_dxt1_single_color(colors, weights, count, color_weights, output);
if (error == 0.0f || count == 1) {
// Early out.
return error;
}
// Quick end point selection.
Vector3 c0, c1;
fit_colors_bbox(colors, count, &c0, &c1);
select_diagonal(colors, count, &c0, &c1);
inset_bbox(&c0, &c1);
output_block4(input_colors, color_weights, c0, c1, output);
// Refine color for the selected indices.
if (optimize_end_points4(output->indices, input_colors, 16, &c0, &c1)) {
output_block4(input_colors, color_weights, c0, c1, output);
}
return evaluate_mse(input_colors, input_weights, color_weights, output);
}

@ -13,11 +13,14 @@ namespace nv {
float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output);
float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
//float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output);
void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Cluster fit end point selection.
float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Quick end point selection followed by least squares refinement.
float compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output);
}

@ -3,6 +3,7 @@
#include "OptimalCompressDXT.h"
#include "QuickCompressDXT.h"
#include "CompressorETC.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
@ -17,38 +18,45 @@
using namespace nv;
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
static void convert_to_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, Vector4 rgbm_colors[16], float rgb_weights[16]) {
float weight_sum = 0;
for (uint i = 0; i < 16; i++) {
const Vector4 & c = input_colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = (M - min_m) / (1 - min_m);
input_colors_rgbm[i] = Vector4(r, g, b, a);
rgbm_colors[i] = Vector4(r, g, b, a);
rgb_weights[i] = input_weights[i] * M;
weight_sum += input_weights[i];
}
if (weight_sum == 0) {
for (uint i = 0; i < 16; i++) rgb_weights[i] = 1;
}
}
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights);
// Compress RGB.
compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color);
@ -138,291 +146,61 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
}
#if 0
BlockDXT5 * block = new(output)BlockDXT5;
// Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.
// Compress the resulting M values optimally.
// Repeat this several times until compression error does not improve?
//Vector3 rgb_block[16];
//float m_block[16];
// Init RGB/M block.
#if 0
nvsquish::WeightedClusterFit fit;
ColorBlock rgba;
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgba.color(i) = toColor32(Vector4(r, g, b, a));
}
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
#endif
#if 1
ColorSet rgb;
rgb.allocate(4, 4);
for (uint i = 0; i < 16; i++) {
const Vector4 & c = colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setColorWeights(compressionOptions.colorWeight);
fit.setColorSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
float nv::compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output) {
// Convert to RGBM.
Vector4 rgbm_colors[16];
float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, rgbm_colors, rgb_weights);
void * etc_output = (uint8 *)output + 8;
void * eac_output = output;
// Compress RGB.
compress_etc2(rgbm_colors, rgb_weights, Vector3(1), etc_output);
// Decompress RGB/M block.
nv::ColorBlock RGB;
block->color.decodeBlock(&RGB);
decompress_etc(etc_output, rgbm_colors);
#if 1
AlphaBlock4x4 M;
// Compute M values to compensate for RGB's error.
for (int i = 0; i < 16; i++) {
const Vector4 & c = colors[i];
const Vector4 & c = input_colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m));
//float m = max(max(R, G), max(B, min_m));
float rm = rgbm_colors[i].x;
float gm = rgbm_colors[i].y;
float bm = rgbm_colors[i].z;
// compute m such that m * (r/M, g/M, b/M) == RGB
// Three equations, one unknown:
// m * r/M == R
// m * g/M == G
// m * b/M == B
// Solve in the least squares sense!
// m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T
// m == dot(rgb, RGB) / dot(rgb, rgb)
float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm));
if (!isFinite(m)) {
m = 1;
}
m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = weights[i];
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#else
OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);
#endif
#if 0
// Decompress M.
block->alpha.decodeBlock(&M);
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
for (uint i = 0; i < src.colorCount; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
//float m = max(max(R, G), max(B, min_m));
float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;
float r = R / m;
float g = G / m;
float b = B / m;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
// Store M in alpha channel.
rgbm_colors[i].w = saturate(m); // @@ What it we don't saturate?
}
#endif
#if 0
block->color.decodeBlock(&RGB);
//AlphaBlock4x4 M;
//M.initWeights(src);
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m));
//float m = max(max(R, G), max(B, min_m));
m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = src.weights[i];
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#endif
#if 0
src.fromRGBM(M, min_m);
compress_eac(rgbm_colors, input_weights, /*input_channel=*/3, /*search_radius=*/1, /*11bit_mode*/false, eac_output);
src.createMinimalSet(/*ignoreTransparent=*/true);
if (src.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(src.color(0), &block->color);
}
else {
// @@ Use our improved compressor.
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&src);
Vector3 start, end;
fit.compress4(&start, &end);
if (fit.compress3(&start, &end)) {
QuickCompress::outputBlock3(src, start, end, block->color);
}
else {
QuickCompress::outputBlock4(src, start, end, block->color);
}
}
#endif // 0
// @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?
// RGBM encoding.
// Maximize precision.
// - Number of possible grey levels:
// - Naive: 2^3 = 8
// - Better: 2^3 + 2^2 = 12
// - How to choose min_m?
// - Ideal = Adaptive per block, don't know where to store.
// - Adaptive per lightmap. How to compute optimal?
// - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.
// - Optimal compressor: Interpolation artifacts.
// - Color transform.
// - Measure error in post-tone-mapping color space.
// - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.
// - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.
// - Enhanced DXT compressor.
// - Typical RGBM encoding as follows:
// rgb -> M = max(rgb), RGB=rgb/M -> RGBM
// - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.
// - We could ensure that M' >= M during compression.
// - We could clamp RGB anyway.
// - We could add a fixed scale value to take into account compression errors and avoid clamping.
// Compress color.
/*if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}*/
return 0; // @@ Compute error.
}
#endif // 0

@ -5,5 +5,5 @@ namespace nv {
class Vector4;
float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output);
float compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output);
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,20 @@
#include "nvcore/nvcore.h"
namespace nv {
class Vector3;
class Vector4;
void decompress_etc(const void * input_block, Vector4 output_colors[16]);
void decompress_eac(const void * input_block, Vector4 output_colors[16], int output_channel);
void decompress_etc_eac(const void * input_block, Vector4 output_colors[16]);
float compress_etc1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_etc2(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_etc2_a1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_eac(Vector4 input_colors[16], float input_weights[16], int input_channel, int search_radius, bool use_11bit_mode, void * output);
float compress_etc2_eac(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
}

@ -250,6 +250,8 @@ namespace
// Compute shared exponent.
int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B;
nvDebugCheck(exp_shared_p <= Emax);
nvDebugCheck(exp_shared_p >= 0);
int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N)));
@ -279,7 +281,7 @@ namespace
{
float v = max3(r, g, b);
uint rgbe;
uint rgbe = 0;
if (v < 1e-32) {
rgbe = 0;
@ -534,6 +536,7 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
}
else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) {
// @@
ir = ig = ib = ia = 0;
}
else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) {
ir = iround(clamp(r, 0.0f, 65535.0f));
@ -543,6 +546,11 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
}
else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) {
// @@
ir = ig = ib = ia = 0;
}
else {
// @@
ir = ig = ib = ia = 0;
}
uint p = 0;

@ -39,6 +39,7 @@
#include "cuda/CudaCompressorDXT.h"
#include "nvimage/DirectDrawSurface.h"
#include "nvimage/KtxFile.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include "nvimage/Image.h"
@ -51,6 +52,7 @@
#include "nvcore/Memory.h"
#include "nvcore/Ptr.h"
#include "nvcore/Array.inl"
using namespace nv;
using namespace nvtt;
@ -222,11 +224,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
return false;
}
nvtt::Surface img;
img.setWrapMode(inputOptions.wrapMode);
img.setAlphaMode(inputOptions.alphaMode);
img.setNormalMap(inputOptions.isNormalMap);
const int faceCount = inputOptions.faceCount;
int width = inputOptions.width;
int height = inputOptions.height;
@ -244,97 +241,230 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel);
}
if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, img.isNormalMap(), compressionOptions, outputOptions)) {
if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, inputOptions.isNormalMap, compressionOptions, outputOptions)) {
return false;
}
// Output images.
for (int f = 0; f < faceCount; f++)
if (outputOptions.container != Container_KTX)
{
nvtt::Surface img;
img.setWrapMode(inputOptions.wrapMode);
img.setAlphaMode(inputOptions.alphaMode);
img.setNormalMap(inputOptions.isNormalMap);
// Output each face from the largest mipmap to the smallest.
for (int f = 0; f < faceCount; f++)
{
int w = width;
int h = height;
int d = depth;
bool canUseSourceImagesForThisFace = canUseSourceImages;
img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
// To normal map.
if (inputOptions.convertToNormalMap) {
img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
}
// To linear space.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
// Resize input.
img.resize(w, h, d, ResizeFilter_Box);
nvtt::Surface tmp = img;
if (!img.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
for (int m = 1; m < mipmapCount; m++) {
w = max(1, w/2);
h = max(1, h/2);
d = max(1, d/2);
int idx = m * faceCount + f;
bool useSourceImages = false;
if (canUseSourceImagesForThisFace) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
}
}
if (useSourceImages) {
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
// For already generated mipmaps, we need to convert to linear.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
}
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch };
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
}
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) {
if (inputOptions.normalizeMipmaps) {
img.expandNormals();
img.normalizeNormalMap();
img.packNormals();
}
tmp = img;
}
else {
tmp = img;
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, m, compressionOptions, outputOptions);
}
}
}
else
{
// KTX files expect face mipmaps to be interleaved.
Array<nvtt::Surface> images(faceCount);
Array<bool> mipChainBroken(faceCount);
int w = width;
int h = height;
int d = depth;
bool canUseSourceImagesForThisFace = canUseSourceImages;
img.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
// https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16
uint imageSize = estimateSize(w, h, 1, 1, compressionOptions) * faceCount;
outputOptions.writeData(&imageSize, sizeof(uint32));
// To normal map.
if (inputOptions.convertToNormalMap) {
img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
img.packNormals();
}
for (int f = 0; f < faceCount; f++)
{
nvtt::Surface s;
s.setWrapMode(inputOptions.wrapMode);
s.setAlphaMode(inputOptions.alphaMode);
s.setNormalMap(inputOptions.isNormalMap);
// To linear space.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
s.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
// Resize input.
img.resize(w, h, d, ResizeFilter_Box);
// To normal map.
if (inputOptions.convertToNormalMap) {
s.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
s.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
}
nvtt::Surface tmp = img;
if (!img.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
// To linear space.
if (!s.isNormalMap()) {
s.toLinear(inputOptions.inputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
// Resize input.
s.resize(w, h, d, ResizeFilter_Box);
nvtt::Surface tmp = s;
if (!s.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
for (int m = 1; m < mipmapCount; m++) {
images.push_back(s);
mipChainBroken.push_back(false);
}
static const unsigned char padding[3] = {0, 0, 0};
for (int m = 1; m < mipmapCount; m++)
{
w = max(1, w/2);
h = max(1, h/2);
d = max(1, d/2);
int idx = m * faceCount + f;
// https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16
imageSize = estimateSize(w, h, d, 1, compressionOptions) * faceCount;
bool useSourceImages = false;
if (canUseSourceImagesForThisFace) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
canUseSourceImagesForThisFace = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
outputOptions.writeData(&imageSize, sizeof(uint32));
nvtt::Surface tmp;
for (int f = 0; f < faceCount; f++)
{
nvtt::Surface& img = images[f];
int idx = m * faceCount + f;
bool useSourceImages = false;
if (!mipChainBroken[f]) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
mipChainBroken[f] = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
}
}
}
if (useSourceImages) {
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
if (useSourceImages) {
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
// For already generated mipmaps, we need to convert to linear.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
// For already generated mipmaps, we need to convert to linear.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
}
}
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
float params[2] = { inputOptions.kaiserAlpha, inputOptions.kaiserStretch };
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
float params[2] = { inputOptions.kaiserStretch, inputOptions.kaiserAlpha };
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
}
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) {
if (inputOptions.normalizeMipmaps) {
img.normalizeNormalMap();
}
tmp = img;
}
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
tmp = img;
tmp.toGamma(inputOptions.outputGamma);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) {
if (inputOptions.normalizeMipmaps) {
img.expandNormals();
img.normalizeNormalMap();
img.packNormals();
quantize(tmp, compressionOptions);
compress(tmp, f, m, compressionOptions, outputOptions);
//cube padding
if (faceCount == 6 && arraySize == 1)
{
//TODO calc offset for uncompressed images
}
tmp = img;
}
else {
tmp = img;
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, m, compressionOptions, outputOptions);
int mipPadding = 3 - ((imageSize + 3) % 4);
if (mipPadding != 0) {
outputOptions.writeData(&padding, mipPadding);
}
}
}
@ -673,6 +803,131 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
return writeSucceed;
}
else if (outputOptions.container == Container_KTX)
{
KtxHeader header;
// TODO cube arrays
if (textureType == TextureType_2D) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 1;
header.pixelDepth = 0;
}
else if (textureType == TextureType_Cube) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 6;
header.pixelDepth = 0;
}
else if (textureType == TextureType_3D) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 1;
header.pixelDepth = d;
}
else if (textureType == TextureType_Array) {
header.numberOfArrayElements = arraySize;
header.numberOfFaces = 1;
header.pixelDepth = 0; // Is it?
}
header.pixelWidth = w;
header.pixelHeight = h;
header.numberOfMipmapLevels = mipmapCount;
bool supported = true;
// TODO non-compressed formats
if (compressionOptions.format == Format_RGBA)
{
//header.glType = ?;
//header.glTypeSize = ?;
//header.glFormat = ?;
}
else
{
header.glType = 0;
header.glTypeSize = 1;
header.glFormat = 0;
if (compressionOptions.format == Format_DXT1) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_DXT1a) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_DXT3) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_BC4) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_RGTC1; // KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 ?
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED;
}
else if (compressionOptions.format == Format_BC5) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_RGTC2; // KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 ?
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG;
}
else if (compressionOptions.format == Format_BC6) {
if (compressionOptions.pixelType == PixelType_Float) header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
else /*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; // By default we assume unsigned.
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_BC7) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM : KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_ETC1) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC1 : KTX_INTERNAL_COMPRESSED_RGB_ETC1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_ETC2_R) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED;
}
else if (compressionOptions.format == Format_ETC2_RG) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG;
}
else if (compressionOptions.format == Format_ETC2_RGB) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC2 : KTX_INTERNAL_COMPRESSED_RGB_ETC2;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_ETC2_RGBA) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC : KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else {
supported = false;
}
//TODO compressionOptions.format == Format_DXT1n, Format_DXT5n ? There seems to be no way to indicate a normal map using ktx. Maybe via key value data?
}
if (!supported)
{
// This container does not support the requested format.
outputOptions.error(Error_UnsupportedOutputFormat);
return false;
}
const uint headerSize = 64;
nvStaticCheck(sizeof(KtxHeader) == 64);
bool writeSucceed = outputOptions.writeData(&header, headerSize);
if (!writeSucceed)
{
outputOptions.error(Error_FileWrite);
}
return writeSucceed;
}
return true;
}
@ -788,15 +1043,34 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression
{
return new CompressorBC7;
}
/*else if (compressionOptions.format == Format_BC5_Luma)
{
return new ProductionCompressorBC5_Luma;
}*/
else if (compressionOptions.format == Format_BC3_RGBM)
{
return new CompressorBC3_RGBM;
}
else if (compressionOptions.format >= Format_ETC1 && compressionOptions.format <= Format_ETC2_RGB_A1)
{
#if defined(HAVE_RGETC)
if (compressionOptions.format == Format_ETC1 && compressionOptions.externalCompressor == "rg_etc") return new RgEtcCompressor;
#endif
#if defined(HAVE_ETCLIB)
if (compressionOptions.externalCompressor == "etclib") return new EtcLibCompressor;
#endif
if (compressionOptions.format == Format_ETC1) return new CompressorETC1;
else if (compressionOptions.format == Format_ETC2_R) return new CompressorETC2_R;
//else if (compressionOptions.format == Format_ETC2_RG) return new CompressorETC2_RG;
else if (compressionOptions.format == Format_ETC2_RGB) return new CompressorETC2_RGB;
else if (compressionOptions.format == Format_ETC2_RGBA) return new CompressorETC2_RGBA;
}
else if (compressionOptions.format == Format_ETC2_RGBM)
{
return new CompressorETC2_RGBM;
}
else if (compressionOptions.format >= Format_PVR_2BPP_RGB && compressionOptions.format <= Format_PVR_4BPP_RGBA)
{
#if defined(HAVE_PVRTEXTOOL)
return new CompressorPVR;
#endif
}
return NULL;
}
@ -860,3 +1134,24 @@ CompressorInterface * Compressor::Private::chooseGpuCompressor(const Compression
return NULL;
}
int Compressor::Private::estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const
{
const Format format = compressionOptions.format;
const uint bitCount = compressionOptions.bitcount;
const uint pitchAlignment = compressionOptions.pitchAlignment;
int size = 0;
for (int m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, pitchAlignment, format);
// Compute extents of next mipmap:
w = max(1, w / 2);
h = max(1, h / 2);
d = max(1, d / 2);
}
return size;
}

@ -56,6 +56,7 @@ namespace nvtt
nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const;
nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const;
int estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const;
bool cudaSupported;
bool cudaEnabled;

@ -34,61 +34,61 @@
namespace nvtt
{
struct DefaultOutputHandler : public nvtt::OutputHandler
{
DefaultOutputHandler(const char * fileName) : stream(fileName) {}
struct DefaultOutputHandler : public nvtt::OutputHandler
{
DefaultOutputHandler(const char * fileName) : stream(fileName) {}
DefaultOutputHandler(FILE * fp) : stream(fp, false) {}
virtual ~DefaultOutputHandler() {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
// ignore.
}
// Output data.
virtual bool writeData(const void * data, int size)
{
stream.serialize(const_cast<void *>(data), size);
//return !stream.isError();
return true;
}
virtual void endImage()
{
// ignore.
}
nv::StdOutputStream stream;
};
struct OutputOptions::Private
{
nv::Path fileName;
virtual ~DefaultOutputHandler() {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
// ignore.
}
// Output data.
virtual bool writeData(const void * data, int size)
{
stream.serialize(const_cast<void *>(data), size);
//return !stream.isError();
return true;
}
virtual void endImage()
{
// ignore.
}
nv::StdOutputStream stream;
};
struct OutputOptions::Private
{
nv::Path fileName;
FILE * fileHandle;
OutputHandler * outputHandler;
ErrorHandler * errorHandler;
bool outputHeader;
Container container;
OutputHandler * outputHandler;
ErrorHandler * errorHandler;
bool outputHeader;
Container container;
int version;
bool srgb;
bool deleteOutputHandler;
void * wrapperProxy; // For the C/C# wrapper.
bool hasValidOutputHandler() const;
void beginImage(int size, int width, int height, int depth, int face, int miplevel) const;
bool writeData(const void * data, int size) const;
bool hasValidOutputHandler() const;
void beginImage(int size, int width, int height, int depth, int face, int miplevel) const;
bool writeData(const void * data, int size) const;
void endImage() const;
void error(Error e) const;
};
void error(Error e) const;
};
} // nvtt namespace

@ -39,21 +39,21 @@ namespace nv
struct AlphaBlockDXT5;
class Vector3;
namespace QuickCompress
{
void compressDXT1(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT3(const ColorBlock & src, BlockDXT3 * dst);
void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8);
namespace QuickCompress
{
void compressDXT1(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst);
void compressDXT3(const ColorBlock & src, BlockDXT3 * dst);
void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8);
void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8);
void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
}
}
} // nv namespace
#endif // NV_TT_QUICKCOMPRESSDXT_H

@ -23,12 +23,14 @@
// OTHER DEALINGS IN THE SOFTWARE.
#include "Surface.h"
#include "CompressorETC.h" // for ETC decoder.
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Color.h"
#include "nvmath/Half.h"
#include "nvmath/ftoi.h"
#include "nvmath/PackedFloat.h"
#include "nvimage/Filter.h"
#include "nvimage/ImageIO.h"
@ -39,8 +41,13 @@
#include "nvimage/ErrorMetric.h"
#include "nvimage/DirectDrawSurface.h"
#include "nvthread/ParallelFor.h"
#include "nvcore/Array.inl"
#include <float.h>
#include <string.h> // memset, memcpy
//#include <stdio.h> // printf?
#if NV_CC_GNUC
#include <math.h> // exp2f and log2f
@ -123,6 +130,18 @@ namespace
else if (format == Format_BC7) {
return 16;
}
else if (format == Format_ETC1 || format == Format_ETC2_R || format == Format_ETC2_RGB) {
return 8;
}
else if (format == Format_ETC2_RG || format == Format_ETC2_RGBA || format == Format_ETC2_RGBM) {
return 16;
}
else if (format == Format_PVR_2BPP_RGB || format == Format_PVR_2BPP_RGBA) {
return 4;
}
else if (format == Format_PVR_4BPP_RGB || format == Format_PVR_4BPP_RGBA) {
return 8;
}
return 0;
}
@ -197,7 +216,7 @@ uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlign
}
}
void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType) {
void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType, nvtt::ShapeRestriction shapeRestriction /*= nvtt::ShapeRestriction_None*/) {
nvDebugCheck(width != NULL && *width > 0);
nvDebugCheck(height != NULL && *height > 0);
nvDebugCheck(depth != NULL && *depth > 0);
@ -234,21 +253,21 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
// Round to power of two.
if (roundMode == RoundMode_ToNextPowerOfTwo)
{
w = nextPowerOfTwo(w);
h = nextPowerOfTwo(h);
d = nextPowerOfTwo(d);
w = nextPowerOfTwo(U32(w));
h = nextPowerOfTwo(U32(h));
d = nextPowerOfTwo(U32(d));
}
else if (roundMode == RoundMode_ToNearestPowerOfTwo)
{
w = nearestPowerOfTwo(w);
h = nearestPowerOfTwo(h);
d = nearestPowerOfTwo(d);
w = nearestPowerOfTwo(U32(w));
h = nearestPowerOfTwo(U32(h));
d = nearestPowerOfTwo(U32(d));
}
else if (roundMode == RoundMode_ToPreviousPowerOfTwo)
{
w = previousPowerOfTwo(w);
h = previousPowerOfTwo(h);
d = previousPowerOfTwo(d);
w = previousPowerOfTwo(U32(w));
h = previousPowerOfTwo(U32(h));
d = previousPowerOfTwo(U32(d));
}
else if (roundMode == RoundMode_ToNextMultipleOfFour)
{
@ -269,6 +288,38 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
d = previousMultipleOfFour(d);
}
if(shapeRestriction == ShapeRestriction_Square)
{
if (textureType == TextureType_2D)
{
int md = nv::min(w,h);
w = md;
h = md;
d = 1;
}
else if (textureType == TextureType_3D)
{
int md = nv::min(nv::min(w,h),d);
w = md;
h = md;
d = md;
}
else if (textureType == TextureType_Cube)
{
int md = nv::min(w, h);
w = md;
h = md;
d = 1;
}
}
else
{
if (textureType == TextureType_2D || textureType == TextureType_Cube)
{
d = 1;
}
}
*width = w;
*height = h;
*depth = d;
@ -509,8 +560,8 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_c
}
}
*rangeMin = range.x;
*rangeMax = range.y;
if (rangeMin) *rangeMin = range.x;
if (rangeMax) *rangeMax = range.y;
}
bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
@ -583,7 +634,7 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
}
// @@ Have loadFloat allocate the image with the desired number of channels.
img->resizeChannelCount(4);
//img->resizeChannelCount(4);
delete m->image;
m->image = img.release();
@ -601,7 +652,8 @@ bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) c
return ImageIO::saveFloat(fileName, m->image, 0, 4);
}
else {
AutoPtr<Image> image(m->image->createImage(0, 4));
uint c = min<uint>(m->image->componentCount(), 4);
AutoPtr<Image> image(m->image->createImage(0, c));
nvCheck(image != NULL);
if (hasAlpha) {
@ -829,16 +881,35 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
return true;
}
#if defined(HAVE_PVRTEXTOOL)
#include <PVRTDecompress.h>
#endif
// @@ Add support for compressed 3D textures.
bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data)
{
if (format != nvtt::Format_BC1 &&
format != nvtt::Format_BC2 &&
format != nvtt::Format_BC3 &&
format != nvtt::Format_BC3n &&
format != nvtt::Format_BC3_RGBM &&
format != nvtt::Format_BC4 &&
format != nvtt::Format_BC5 &&
format != nvtt::Format_BC6 &&
format != nvtt::Format_BC7)
format != nvtt::Format_BC7 &&
format != nvtt::Format_ETC1 &&
format != nvtt::Format_ETC2_R &&
format != nvtt::Format_ETC2_RG &&
format != nvtt::Format_ETC2_RGB &&
format != nvtt::Format_ETC2_RGBA &&
format != nvtt::Format_ETC2_RGBM
#if defined(HAVE_PVRTEXTOOL)
&& format != nvtt::Format_PVR_2BPP_RGB
&& format != nvtt::Format_PVR_4BPP_RGB
&& format != nvtt::Format_PVR_2BPP_RGBA
&& format != nvtt::Format_PVR_4BPP_RGBA
#endif
)
{
return false;
}
@ -851,7 +922,7 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
m->image->allocate(4, w, h, 1);
m->type = TextureType_2D;
const int bw = (w + 3) / 4;
const int bw = (w + 3) / 4; // @@ Not if PVR 2bpp!
const int bh = (h + 3) / 4;
const uint bs = blockSize(format);
@ -859,130 +930,166 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
const uint8 * ptr = (const uint8 *)data;
TRY {
if (format == nvtt::Format_BC6)
{
// BC6 format - decode directly to float
for (int y = 0; y < bh; y++)
{
for (int x = 0; x < bw; x++)
{
Vector3 colors[16];
const BlockBC6 * block = (const BlockBC6 *)ptr;
block->decodeBlock(colors);
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Vector3 rgb = colors[yy*4 + xx];
if (x * 4 + xx < w && y * 4 + yy < h)
{
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = rgb.x;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = rgb.y;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = rgb.z;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = 1.0f;
}
}
}
ptr += bs;
}
}
}
else
{
// Non-BC6 - decode to 8-bit, then convert to float
for (int y = 0; y < bh; y++)
{
for (int x = 0; x < bw; x++)
{
ColorBlock colors;
if (format == nvtt::Format_BC1)
{
const BlockDXT1 * block = (const BlockDXT1 *)ptr;
if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC2)
{
const BlockDXT3 * block = (const BlockDXT3 *)ptr;
if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC3)
{
const BlockDXT5 * block = (const BlockDXT5 *)ptr;
if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC4)
{
const BlockATI1 * block = (const BlockATI1 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC5)
{
const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC7)
{
const BlockBC7 * block = (const BlockBC7 *)ptr;
block->decodeBlock(&colors);
}
else
{
nvDebugCheck(false);
}
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Color32 c = colors.color(xx, yy);
if (x * 4 + xx < w && y * 4 + yy < h)
{
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f;
}
}
}
ptr += bs;
}
}
}
#if defined(HAVE_PVRTEXTOOL)
if (format >= nvtt::Format_PVR_2BPP_RGB && format <= nvtt::Format_PVR_4BPP_RGBA)
{
bool two_bit_mode = (format == nvtt::Format_PVR_2BPP_RGB || format == nvtt::Format_PVR_2BPP_RGBA);
uint8 * output = new uint8[4 * w * h];
PVRTDecompressPVRTC(ptr, two_bit_mode, w, h, output);
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
m->image->pixel(0, x, y, 0) = output[4*(y*w + x) + 0] / 255.0f;
m->image->pixel(1, x, y, 0) = output[4*(y*w + x) + 1] / 255.0f;
m->image->pixel(2, x, y, 0) = output[4*(y*w + x) + 2] / 255.0f;
m->image->pixel(3, x, y, 0) = output[4*(y*w + x) + 3] / 255.0f;
}
}
delete [] output;
}
else
#endif
if (format == nvtt::Format_BC6 || (format >= nvtt::Format_ETC1 && format <= nvtt::Format_ETC2_RGBM))
{
// Some formats we decode directly to float:
for (int y = 0; y < bh; y++) {
for (int x = 0; x < bw; x++) {
Vector4 colors[16];
if (format == nvtt::Format_BC6) {
const BlockBC6 * block = (const BlockBC6 *)ptr;
block->decodeBlock(colors);
}
else if (format == nvtt::Format_ETC1 || format == nvtt::Format_ETC2_RGB) {
nv::decompress_etc(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RGBA || format == nvtt::Format_ETC2_RGBM) {
nv::decompress_etc_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_R) {
// @@ Not implemented.
//nv::decompress_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RG) {
// @@ Not implemented.
//nv::decompress_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RGB_A1) {
// @@ Not implemented?
//nv::decompress_etc(ptr, colors);
}
for (int yy = 0; yy < 4; yy++) {
for (int xx = 0; xx < 4; xx++) {
Vector4 c = colors[yy*4 + xx];
if (x * 4 + xx < w && y * 4 + yy < h) {
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = c.x;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = c.y;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = c.z;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = c.w;
}
}
}
ptr += bs;
}
}
}
else
{
// Others, we decode to 8-bit, then convert to float
for (int y = 0; y < bh; y++) {
for (int x = 0; x < bw; x++) {
ColorBlock colors;
if (format == nvtt::Format_BC1)
{
const BlockDXT1 * block = (const BlockDXT1 *)ptr;
if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC2)
{
const BlockDXT3 * block = (const BlockDXT3 *)ptr;
if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC3 || format == nvtt::Format_BC3n || format == nvtt::Format_BC3_RGBM)
{
const BlockDXT5 * block = (const BlockDXT5 *)ptr;
if (decoder == Decoder_D3D10) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_D3D9) {
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC4)
{
const BlockATI1 * block = (const BlockATI1 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC5)
{
const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC7)
{
const BlockBC7 * block = (const BlockBC7 *)ptr;
block->decodeBlock(&colors);
}
else
{
nvDebugCheck(false);
}
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Color32 c = colors.color(xx, yy);
if (x * 4 + xx < w && y * 4 + yy < h)
{
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f;
}
}
}
ptr += bs;
}
}
}
}
CATCH {
return false;
@ -1092,7 +1199,7 @@ void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth
m->image = img;
}
void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter)
void Surface::resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter)
{
if (isNull()) return;
@ -1104,27 +1211,17 @@ void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilte
int h = m->image->height();
int d = m->image->depth();
getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type);
getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type, nvtt::ShapeRestriction_Square);
if (m->type == TextureType_2D)
{
nvDebugCheck(d==1);
int md = nv::min(w,h);
w = md;
h = md;
}
else if (m->type == TextureType_Cube)
{
nvDebugCheck(d==1);
nvDebugCheck(w==h);
}
else if (m->type == TextureType_3D)
{
int md = nv::min(nv::min(w,h),d);
w = md;
h = md;
d = md;
}
resize(w, h, d, filter, filterWidth, params);
}
@ -1151,6 +1248,63 @@ void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, fl
resize(w, h, d, filter, filterWidth, params);
}
float rmsBilinearError(nvtt::Surface original, nvtt::Surface resized) {
return nv::rmsBilinearColorError(original.m->image, resized.m->image, (FloatImage::WrapMode)original.wrapMode(), original.alphaMode() == AlphaMode_Transparency);
}
void Surface::autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter)
{
Surface original = *this;
Surface resized = original;
int w = width();
int h = height();
int d = depth();
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
while (w >= 4 && h >= 4 && d >= 1) {
// Resize always from original? This is more expensive, but should produce higher quality.
//resized = original;
resized.resize(w, h, d, filter);
#if 0
// Scale back up to original size. @@ Upscaling not implemented!
Surface restored = resized;
restored.resize(original.width(), original.height(), original.depth(), ResizeFilter_Triangle);
float error;
if (isNormalMap()) {
error = nvtt::angularError(original, restored);
}
else {
error = nvtt::rmsError(original, restored);
}
#else
float error = rmsBilinearError(original, resized);
#endif
if (error < errorTolerance) {
*this = resized;
nvDebug("image resized %dx%d -> %dx%d (error=%f)\n", original.width(), original.height(), w, h, error);
}
else {
nvDebug("image can't be resized further (error=%f)\n", error);
break;
}
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
}
bool Surface::canMakeNextMipmap(int min_size /*= 1*/)
{
if (isNull()) return false;
@ -1196,7 +1350,7 @@ bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const floa
{
nvDebugCheck(filter == MipmapFilter_Kaiser);
KaiserFilter filter(filterWidth);
if (params != NULL) filter.setParameters(params[0], params[1]);
if (params != NULL) filter.setParameters(/*alpha=*/params[0], /*stretch=*/params[1]);
img = img->downSample(filter, wrapMode, 3);
}
}
@ -1357,8 +1511,9 @@ void Surface::toSrgb()
for (uint c = 0; c < 3; c++) {
float * channel = img->channel(c);
for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [=](int i) {
channel[i] = ::toSrgb(channel[i]);
}
}//);
}
}
@ -1382,8 +1537,9 @@ void Surface::toLinearFromSrgb()
for (uint c = 0; c < 3; c++) {
float * channel = img->channel(c);
for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [=](int i) {
channel[i] = ::fromSrgb(channel[i]);
}
}//);
}
}
@ -2827,6 +2983,78 @@ Surface Surface::createSubImage(int x0, int x1, int y0, int y1, int z0, int z1)
return s;
}
Surface Surface::warp(int w, int h, WarpFunction * warp_function) const
{
Surface s;
FloatImage * img = s.m->image = new FloatImage;
const int C = m->image->componentCount();
img->allocate(C, w, h, 1);
#define USE_PARALLEL_FOR 0
#if USE_PARALLEL_FOR
nv::parallel_for(h, 1, [=](int y) {
#else
for (int y = 0; y < h; y++) {
#endif
for (int x = 0; x < w; x++) {
float fx = (float(x) + 0.0f) / w;
float fy = (float(y) + 0.0f) / h;
float fz = 0;
warp_function(fx, fy, fz);
for (int c = 0; c < C; c++) {
img->pixel(c, x, y, 0) = m->image->sampleLinearClamp(c, fx, fy);
}
}
}
#if USE_PARALLEL_FOR
);
#endif
return s;
}
Surface Surface::warp(int w, int h, int d, WarpFunction * warp_function) const
{
Surface s;
FloatImage * img = s.m->image = new FloatImage;
const int C = m->image->componentCount();
img->allocate(C, w, h, d);
for (int z = 0; z < d; z++) {
#define USE_PARALLEL_FOR 0
#if USE_PARALLEL_FOR
nv::parallel_for(h, 1, [=](int y) {
#else
for (int y = 0; y < h; y++) {
#endif
for (int x = 0; x < w; x++) {
float fx = (float(x) + 0.0f) / w;
float fy = (float(y) + 0.0f) / h;
float fz = (float(z) + 0.0f) / d;
warp_function(fx, fy, fz);
for (int c = 0; c < C; c++) {
img->pixel(c, x, y, z) = m->image->sampleLinearClamp(c, fx, fy, fz); // @@ 2D only.
}
}
}
#if USE_PARALLEL_FOR
);
#endif
}
return s;
}
bool Surface::copyChannel(const Surface & srcImage, int srcChannel)
{
return copyChannel(srcImage, srcChannel, srcChannel);
@ -2953,7 +3181,7 @@ void Surface::setAtlasBorder(int aw, int ah, float r, float g, float b, float a)
}
// Vertical lines:
for (uint i = 0, x = 0; i < uint(ah); i++, x += tile_width)
for (uint i = 0, x = 0; i < uint(aw); i++, x += tile_width)
{
for (uint y = 0; y < h; y++)
{
@ -3083,9 +3311,9 @@ Surface nvtt::histogram(const Surface & img, int width, int height)
return histogram(img, /*minRange*/0, maxRange, width, height);
}
#include "nvcore/Array.inl"
#include "nvmath/PackedFloat.h"
#include <stdio.h>
//#include "nvcore/Array.inl"
//#include "nvmath/PackedFloat.h"
//#include <stdio.h>
nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height)
{
@ -3234,7 +3462,7 @@ nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRang
maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z));
}
printf("maxh = %f\n", maxh);
//printf("maxh = %f\n", maxh);
//maxh = 80;
maxh = 256;

@ -83,7 +83,7 @@ namespace nv {
uint countMipmaps(uint w, uint h, uint d);
uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size);
uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format);
void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType);
void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType, nvtt::ShapeRestriction shapeRestriction = nvtt::ShapeRestriction_None);
}

@ -10,8 +10,8 @@
// Gran Central Dispatch (GCD/libdispatch)
// http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H)
#define HAVE_GCD 1
#include <dispatch/dispatch.h>
//#define HAVE_GCD 1
//#include <dispatch/dispatch.h>
#endif
// Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime:
@ -64,7 +64,7 @@ namespace nvtt {
#endif
#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H)
#if HAVE_GCD
// Task dispatcher using Apple's Grand Central Dispatch.
struct AppleTaskDispatcher : public TaskDispatcher

@ -47,9 +47,9 @@ const char * nvtt::errorString(Error e)
return "Error writing through output handler";
case Error_UnsupportedOutputFormat:
return "The container file does not support the selected output format";
default:
return "Invalid error";
}
return "Invalid error";
}
// Return NVTT version.

@ -105,7 +105,21 @@ namespace nvtt
Format_BC6,
Format_BC7,
Format_BC3_RGBM, //
Format_BC3_RGBM,
Format_ETC1,
Format_ETC2_R,
Format_ETC2_RG,
Format_ETC2_RGB,
Format_ETC2_RGBA,
Format_ETC2_RGB_A1,
Format_ETC2_RGBM,
Format_PVR_2BPP_RGB, // Using PVR textools.
Format_PVR_4BPP_RGB,
Format_PVR_2BPP_RGBA,
Format_PVR_4BPP_RGBA,
Format_Count
};
@ -155,6 +169,7 @@ namespace nvtt
NVTT_API void setFormat(Format format);
NVTT_API void setQuality(Quality quality);
NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f);
NVTT_API void setRGBMThreshold(float min_m);
NVTT_API void setExternalCompressor(const char * name);
@ -173,9 +188,10 @@ namespace nvtt
NVTT_API void setTargetDecoder(Decoder decoder);
// Translate to and from D3D formats.
NVTT_API Format format() const;
NVTT_API unsigned int d3d9Format() const;
NVTT_API unsigned int dxgiFormat() const;
//NVTT_API bool setD3D9Format(unsigned int format);
//NVTT_API unsigned int dxgiFormat() const;
//NVTT_API bool setDxgiFormat(unsigned int format);
};
@ -253,6 +269,14 @@ namespace nvtt
AlphaMode_Premultiplied,
};
// Extents shape restrictions
enum ShapeRestriction
{
ShapeRestriction_None,
ShapeRestriction_Square,
};
// Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1)
struct InputOptions
{
@ -344,7 +368,7 @@ namespace nvtt
{
Container_DDS,
Container_DDS10,
// Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/
Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/
// Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format
};
@ -439,6 +463,9 @@ namespace nvtt
ToneMapper_Lightmap,
};
// Transform the given x,y coordinates.
typedef void WarpFunction(float & x, float & y, float & d);
// A surface is one level of a 2D or 3D texture. (New in NVTT 2.1)
// @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression.
@ -486,7 +513,8 @@ namespace nvtt
NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter);
NVTT_API void resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter);
NVTT_API void autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter);
NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1);
NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1);
@ -554,6 +582,10 @@ namespace nvtt
NVTT_API void flipZ();
NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const;
NVTT_API Surface warp(int w, int h, WarpFunction * f) const;
NVTT_API Surface warp(int w, int h, int d, WarpFunction * f) const;
// Copy image data.
NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel);
NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel);

@ -146,9 +146,16 @@ static const char * s_witnessImageSet[] = {
};
static const char * s_witnessLmapImageSet[] = {
"specruin.dds",
"cottage.dds",
"hallway.dds",
"windmill.dds",
"tunnel.dds",
"theater.dds",
"tower.dds",
"hub.dds",
"mine.dds",
"archway.dds",
"hut.dds",
"shaft.dds",
};
static const char * s_normalMapImageSet[] = {
@ -187,8 +194,14 @@ enum Mode {
Mode_BC5_Normal_Paraboloid,
Mode_BC5_Normal_Quartic,
//Mode_BC5_Normal_DualParaboloid,
Mode_BC6,
Mode_BC7,
Mode_BC6,
Mode_BC7,
Mode_ETC1_IC,
Mode_ETC1_EtcLib,
Mode_ETC2_EtcLib,
Mode_ETC1_RgEtc,
Mode_ETC2_RGBM,
Mode_PVR,
Mode_Count
};
static const char * s_modeNames[] = {
@ -207,8 +220,14 @@ static const char * s_modeNames[] = {
"BC5-Normal-Paraboloid", // Mode_BC5_Normal_Paraboloid,
"BC5-Normal-Quartic", // Mode_BC5_Normal_Quartic,
//"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid,
"BC6", // Mode_BC6,
"BC7", // Mode_BC7,
"BC6", // Mode_BC6,
"BC7", // Mode_BC7,
"ETC1-IC",
"ETC1-EtcLib",
"ETC2-EtcLib",
"ETC1-RgEtc",
"ETC2-RGBM",
"PVR",
};
nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count);
@ -218,14 +237,16 @@ struct Test {
Mode modes[6];
};
static Test s_imageTests[] = {
{"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}},
{"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}},
//{"Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}},
{"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}},
{"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}},
{"HDR", 2, {Mode_BC3_RGBM, Mode_BC6}},
{"BC6", 1, {Mode_BC6}},
{"BC7", 1, {Mode_BC7}},
/*0*/ {"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}},
/*1*/ {"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}},
/*2*/ {"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}},
/*3*/ {"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}},
/*4*/ {"HDR", 3, {Mode_ETC2_RGBM, Mode_BC3_RGBM, Mode_BC6}},
/*5*/ {"BC6", 1, {Mode_BC6}},
/*6*/ {"BC7", 1, {Mode_BC7}},
/*7*/ {"ETC", 3, {Mode_ETC1_IC, Mode_ETC1_RgEtc, Mode_ETC2_EtcLib}},
/*8*/ {"Color Mobile", 4, {Mode_PVR, Mode_ETC1_IC, Mode_ETC2_EtcLib, Mode_BC1}},
/*9*/ //{"ETC-Lightmap", 2, {Mode_BC3_RGBM, Mode_ETC_RGBM}},
};
const int s_imageTestCount = ARRAY_SIZE(s_imageTests);
@ -404,10 +425,10 @@ int main(int argc, char *argv[])
i++;
}
}
else
{
printf("Warning: unrecognized option \"%s\"\n", argv[i]);
}
else
{
printf("Warning: unrecognized option \"%s\"\n", argv[i]);
}
}
// Validate inputs.
@ -462,7 +483,8 @@ int main(int argc, char *argv[])
}
else
{
compressionOptions.setQuality(nvtt::Quality_Production);
compressionOptions.setQuality(nvtt::Quality_Normal);
//compressionOptions.setQuality(nvtt::Quality_Production);
}
//compressionOptions.setExternalCompressor("ati");
//compressionOptions.setExternalCompressor("squish");
@ -515,13 +537,13 @@ int main(int argc, char *argv[])
// Labels on the left side.
if (errorMode == ErrorMode_RMSE) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01";
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.03,0.01";
}
else if (errorMode == ErrorMode_CieLab) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,4,22,1";
}
else if (errorMode == ErrorMode_AngularRMSE) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01";
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.2,0.02"; // 0.05,0.01
}
// Labels at the bottom.
@ -581,7 +603,6 @@ int main(int argc, char *argv[])
else if (errorMode == ErrorMode_AngularRMSE) {
graphWriter << "&chtt=" << set.name << "%20-%20" << test.name << "%20-%20Angular RMSE";
}
Timer timer;
@ -590,7 +611,7 @@ int main(int argc, char *argv[])
nvtt::Surface img;
printf("Running Test: %s with Set: %s\n", test.name, set.name);
printf("Running test '%s' with set '%s'\n", test.name, set.name);
graphWriter << "&chd=t:";
@ -602,10 +623,11 @@ int main(int argc, char *argv[])
Mode mode = test.modes[t];
nvtt::Format format;
const char * compressor_name = NULL;
if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal || mode == Mode_BC3_RGBS) {
format = nvtt::Format_BC1;
}
else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM || mode == Mode_BC3_LUVW) {
else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_LUVW) {
format = nvtt::Format_BC3;
}
else if (mode == Mode_BC3_Normal) {
@ -614,20 +636,51 @@ int main(int argc, char *argv[])
else if (mode == Mode_BC5_Normal || mode == Mode_BC5_Normal_Stereographic || mode == Mode_BC5_Normal_Paraboloid || mode == Mode_BC5_Normal_Quartic) {
format = nvtt::Format_BC5;
}
else if (mode == Mode_BC6)
{
format = nvtt::Format_BC6;
}
else if (mode == Mode_BC7)
{
format = nvtt::Format_BC7;
}
else
{
nvDebugCheck(false);
}
else if (mode == Mode_BC3_RGBM) {
format = nvtt::Format_BC3_RGBM;
}
else if (mode == Mode_BC6)
{
format = nvtt::Format_BC6;
}
else if (mode == Mode_BC7)
{
format = nvtt::Format_BC7;
}
else if (mode == Mode_ETC1_IC)
{
format = nvtt::Format_ETC1;
}
else if (mode == Mode_ETC1_EtcLib)
{
format = nvtt::Format_ETC1;
compressor_name = "etclib";
}
else if (mode == Mode_ETC2_EtcLib)
{
format = nvtt::Format_ETC2_RGB;
compressor_name = "etclib";
}
else if (mode == Mode_ETC1_RgEtc)
{
format = nvtt::Format_ETC1;
compressor_name = "rg_etc";
}
else if (mode == Mode_ETC2_RGBM)
{
format = nvtt::Format_ETC2_RGBM;
}
else if (mode == Mode_PVR)
{
format = nvtt::Format_PVR_4BPP_RGB;
}
else
{
nvUnreachable();
}
compressionOptions.setFormat(format);
if (compressor_name) compressionOptions.setExternalCompressor(compressor_name);
if (set.type == ImageType_RGBA) {
img.setAlphaMode(nvtt::AlphaMode_Transparency);
@ -653,6 +706,7 @@ int main(int argc, char *argv[])
printf("Input image '%s' not found.\n", set.fileNames[i]);
return EXIT_FAILURE;
}
float color_range = 0.0f;
if (img.isNormalMap()) {
img.normalizeNormalMap();
@ -693,16 +747,34 @@ int main(int argc, char *argv[])
tmp.clamp(2);
tmp.clamp(3);
}
else if (mode == Mode_BC3_RGBM) {
tmp.setAlphaMode(nvtt::AlphaMode_None);
if (set.type == ImageType_HDR) {
// Transform to gamma-2.0 space before applying RGBM - helps a lot with banding in the darks.
tmp.toGamma(2.0f);
tmp.toRGBM(3.0f); // range of 3.0 in gamma-2.0 space == range of 9.0 in linear space
else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) {
float r, g, b;
tmp.range(0, NULL, &r);
tmp.range(1, NULL, &g);
tmp.range(2, NULL, &b);
color_range = max3(r, g, b);
printf("color range = %f\n", color_range);
tmp.setAlphaMode(nvtt::AlphaMode_Transparency);
const float max_color_range = 16.0f;
if (color_range > max_color_range) {
color_range = max_color_range;
}
else {
tmp.toRGBM();
for (int i = 0; i < 3; i++) {
tmp.scaleBias(i, 1.0f / color_range, 0.0f);
}
tmp.toneMap(nvtt::ToneMapper_Linear, /*parameters=*/NULL); // Clamp without changing the hue.
// Clamp alpha.
tmp.clamp(3);
// To gamma.
tmp.toGamma(2);
compressionOptions.setRGBMThreshold(0.2f);
}
else if (mode == Mode_BC3_LUVW) {
tmp.setAlphaMode(nvtt::AlphaMode_None);
@ -781,14 +853,25 @@ int main(int argc, char *argv[])
}*/
}
}
else if (mode == Mode_BC3_RGBM) {
if (set.type == ImageType_HDR) {
img_out.fromRGBM(3.0f);
img_out.toLinear(2.0f);
else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) {
/*if (set.type == ImageType_HDR) {
//img_out.fromRGBM(3.0f);
img_out.fromRGBM(range);
img_out.toLinear(2.0f);
}
else {
img_out.fromRGBM();
}*/
img_out.fromRGBM(1.0f, 0.2f);
img_out.toLinear(2);
for (int i = 0; i < 3; i++) {
img_out.scaleBias(i, color_range, 0.0f);
}
img_out.copyChannel(img, 3); // Copy alpha channel from source.
img_out.setAlphaMode(nvtt::AlphaMode_Transparency);
}
else if (mode == Mode_BC3_LUVW) {
if (set.type == ImageType_HDR) {

@ -61,6 +61,9 @@ struct MyAssertHandler : public nv::AssertHandler {
virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) {
fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line);
nv::debug::dumpInfo();
if (nv::debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG;
}
exit(1);
}
};

@ -154,11 +154,13 @@ int main(int argc, char *argv[])
bool loadAsFloat = false;
bool rgbm = false;
bool rangescale = false;
bool srgb = false;
const char * externalCompressor = NULL;
bool silent = false;
bool dds10 = false;
bool ktx = false;
nv::Path input;
nv::Path output;
@ -285,6 +287,31 @@ int main(int argc, char *argv[])
format = nvtt::Format_BC3_RGBM;
rgbm = true;
}
else if (strcmp("-etc1", argv[i]) == 0)
{
format = nvtt::Format_ETC1;
}
else if (strcmp("-etc2", argv[i]) == 0 || strcmp("-etc2_rgb", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGB;
}
else if (strcmp("-etc2_eac", argv[i]) == 0 || strcmp("-etc2_rgba", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGBA;
}
else if (strcmp("-eac", argv[i]) == 0 || strcmp("-etc2_r", argv[i]) == 0)
{
format = nvtt::Format_ETC2_R;
}
else if (strcmp("-etc2_rg", argv[i]) == 0)
{
format = nvtt::Format_ETC2_R;
}
else if (strcmp("-etc2_rgbm", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGBM;
rgbm = true;
}
// Undocumented option. Mainly used for testing.
else if (strcmp("-ext", argv[i]) == 0)
@ -309,7 +336,15 @@ int main(int argc, char *argv[])
{
dds10 = true;
}
else if (strcmp("-ktx", argv[i]) == 0)
{
ktx = true;
}
else if (strcmp("-srgb", argv[i]) == 0)
{
srgb = true;
}
else if (argv[i][0] != '-')
{
input = argv[i];
@ -321,15 +356,23 @@ int main(int argc, char *argv[])
{
output.copy(input.str());
output.stripExtension();
output.append(".dds");
if (ktx)
{
output.append(".ktx");
}
else
{
output.append(".dds");
}
}
break;
}
else
{
printf("Warning: unrecognized option \"%s\"\n", argv[i]);
}
else
{
printf("Warning: unrecognized option \"%s\"\n", argv[i]);
}
}
const uint version = nvtt::version();
@ -380,7 +423,9 @@ int main(int argc, char *argv[])
printf("Output options:\n");
printf(" -silent \tDo not output progress messages\n");
printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7)\n\n");
printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7, unless ktx is being used)\n");
printf(" -ktx \tUse KTX container format\n");
printf(" -srgb \tIf the requested format allows it, output will be in sRGB color space\n\n");
return EXIT_FAILURE;
}
@ -398,7 +443,7 @@ int main(int argc, char *argv[])
bool useSurface = false; // @@ use Surface API in all cases!
nvtt::Surface image;
if (format == nvtt::Format_BC3_RGBM || rgbm) {
if (format == nvtt::Format_BC3_RGBM || format == nvtt::Format_ETC2_RGBM || rgbm) {
useSurface = true;
if (!image.load(input.str())) {
@ -440,7 +485,7 @@ int main(int argc, char *argv[])
// To gamma.
image.toGamma(2);
if (format != nvtt::Format_BC3_RGBM) {
if (format != nvtt::Format_BC3_RGBM || format != nvtt::Format_ETC2_RGBM) {
image.setAlphaMode(nvtt::AlphaMode_None);
image.toRGBM(1, 0.15f);
}
@ -494,7 +539,7 @@ int main(int argc, char *argv[])
nvDebugCheck(dds.isTextureArray());
inputOptions.setTextureLayout(nvtt::TextureType_Array, dds.width(), dds.height(), 1, dds.arraySize());
faceCount = dds.arraySize();
dds10 = true;
dds10 = ktx ? false : true;
}
uint mipmapCount = dds.mipmapCount();
@ -569,11 +614,12 @@ int main(int argc, char *argv[])
inputOptions.setAlphaMode(nvtt::AlphaMode_None);
}
// IC: Do not enforce D3D9 restrictions anymore.
// Block compressed textures with mipmaps must be powers of two.
if (!noMipmaps && format != nvtt::Format_RGB)
/*if (!noMipmaps && format != nvtt::Format_RGB)
{
inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo);
}
}*/
if (normal)
{
@ -720,15 +766,27 @@ int main(int argc, char *argv[])
outputOptions.setOutputHandler(&outputHandler);
outputOptions.setErrorHandler(&errorHandler);
// Automatically use dds10 if compressing to BC6 or BC7
if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7)
{
dds10 = true;
}
if (dds10)
if (ktx)
{
outputOptions.setContainer(nvtt::Container_DDS10);
outputOptions.setContainer(nvtt::Container_KTX);
}
else
{
// Automatically use dds10 if compressing to BC6 or BC7
if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7) {
dds10 = true;
}
if (dds10) {
outputOptions.setContainer(nvtt::Container_DDS10);
}
else {
outputOptions.setContainer(nvtt::Container_DDS);
}
}
if (srgb) {
outputOptions.setSrgbFlag(true);
}
// printf("Press ENTER.\n");

@ -99,8 +99,8 @@ int main(int argc, char *argv[])
return 1;
}
break;
}
break;
}
}
if (input.isNull() || output.isNull())
@ -136,21 +136,21 @@ int main(int argc, char *argv[])
nv::FloatImage fimage(&image);
fimage.toLinear(0, 3, gamma);
uint thumbW, thumbH;
if (image.width() > image.height())
{
thumbW = size;
thumbH = uint ((float (image.height()) / float (image.width())) * size);
}
else
{
thumbW = uint ((float (image.width()) / float (image.height())) * size);
thumbH = size;
}
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB);
uint thumbW, thumbH;
if (image.width() > image.height())
{
thumbW = size;
thumbH = uint ((float (image.height()) / float (image.width())) * size);
}
else
{
thumbW = uint ((float (image.width()) / float (image.height())) * size);
thumbH = size;
}
nv::AutoPtr<nv::FloatImage> fresult(fimage.resize(nv::BoxFilter(), thumbW, thumbH, nv::FloatImage::WrapMode_Clamp));
nv::AutoPtr<nv::Image> result(fresult->createImageGammaCorrect(gamma));
result->setFormat(nv::Image::Format_ARGB);
nv::StdOutputStream stream(output.str());
nv::ImageIO::save(output.str(), stream, result.ptr(), metaData.buffer());
@ -160,7 +160,7 @@ int main(int argc, char *argv[])
nv::StdOutputStream stream(output.str());
nv::ImageIO::save(output.str(), stream, &image, metaData.buffer());
}
return 0;
}

Loading…
Cancel
Save