Merge changes from The Witness.

pull/276/head
Ignacio 6 years ago
parent 2075d740c9
commit 9489aed825

@ -1,6 +1,6 @@
NVIDIA Texture Tools is licensed under the MIT license.
Copyright (c) 2009-2016 Ignacio Castano
Copyright (c) 2009-2017 Ignacio Castaño
Copyright (c) 2007-2009 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person

@ -6,7 +6,7 @@ manipulation tools, designed to be integrated in game tools and asset
processing pipelines.
The primary features of the library are mipmap and normal map generation, format
conversion and DXT compression.
conversion, and DXT compression.
### How to build (Windows)
@ -42,5 +42,5 @@ src/nvtt/tools/compress.cpp
Detailed documentation of the API can be found at:
http://code.google.com/p/nvidia-texture-tools/wiki/ApiDocumentation
https://github.com/castano/nvidia-texture-tools/wiki/ApiDocumentation

@ -36,4 +36,6 @@ do
#./nvcompress -silent -alpha -nomips -bc6 $file.$EXT $file.bc6.dds
#./nvimgdiff -alpha $file.$EXT $file.bc6.dds
# ETC2-EAC
./nvcompress -silent -alpha -nomips -etc_rgbm
done

@ -349,9 +349,18 @@ LLVM:
# define POSH_OS_STRING "UNICOS"
#endif
#if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
//ACS if we're in xcode, look at the target conditionals to figure out if this is ios or osx
#if defined __APPLE__
# include "TargetConditionals.h"
#endif
#if TARGET_OS_IPHONE
# define POSH_OS_IOS 1
# define POSH_OS_STRING "iOS"
#else
# if ( defined __MWERKS__ && defined __powerc && !defined macintosh ) || defined __APPLE_CC__ || defined macosx
# define POSH_OS_OSX 1
# define POSH_OS_STRING "MacOS X"
# endif
#endif
#if defined __sun__ || defined sun || defined __sun || defined __solaris__

@ -1808,7 +1808,7 @@ typedef unsigned long uint64;
{
if (block_inten[0] > m_pSorted_luma[n - 1])
{
const uint min_error = labs(int(block_inten[0] - m_pSorted_luma[n - 1]));
const uint min_error = abs(int(block_inten[0] - m_pSorted_luma[n - 1]));
if (min_error >= trial_solution.m_error)
continue;
}
@ -1822,7 +1822,7 @@ typedef unsigned long uint64;
{
if (m_pSorted_luma[0] > block_inten[3])
{
const uint min_error = labs(int(m_pSorted_luma[0] - block_inten[3]));
const uint min_error = abs(int(m_pSorted_luma[0] - block_inten[3]));
if (min_error >= trial_solution.m_error)
continue;
}
@ -1914,7 +1914,7 @@ done:
for (uint packed_c = 0; packed_c < limit; packed_c++)
{
int v = etc1_decode_value(diff, inten, selector, packed_c);
uint err = labs(v - static_cast<int>(color));
uint err = abs(v - static_cast<int>(color));
if (err < best_error)
{
best_error = err;

@ -14,6 +14,7 @@ SET(BC6H_SRCS
zohtwo.cpp)
ADD_LIBRARY(bc6h STATIC ${BC6H_SRCS})
TARGET_LINK_LIBRARIES(bc6h nvcore nvmath)
IF(NOT WIN32)
IF(CMAKE_COMPILER_IS_GNUCXX)

@ -37,7 +37,7 @@ int Utils::lerp(int a, int b, int i, int denom)
case 3: denom *= 5; i *= 5; // fall through to case 15
case 15: weights = denom15_weights_64; break;
case 7: weights = denom7_weights_64; break;
default: nvDebugCheck(0);
default: nvUnreachable();
}
return (a*weights[denom-i] +b*weights[i] + round) >> shift;

@ -584,7 +584,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
{
Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
float err = 0;
//float err = 0;
for (int region=0; region<NREGIONS_ONE; ++region)
{

@ -672,7 +672,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
{
Vector3 pixels[Tile::TILE_TOTAL];
float importance[Tile::TILE_TOTAL];
float err = 0;
//float err = 0;
for (int region=0; region<NREGIONS_TWO; ++region)
{

@ -22,6 +22,7 @@ SET(BC7_SRCS
avpcl_utils.h)
ADD_LIBRARY(bc7 STATIC ${BC7_SRCS})
TARGET_LINK_LIBRARIES(bc7 nvcore nvmath)
TARGET_LINK_LIBRARIES(bc7 nvmath)

@ -243,7 +243,7 @@ static void write_header(const IntEndptsRGB_2 endpts[NREGIONS], int shapeindex,
static void read_header(Bits &in, IntEndptsRGB_2 endpts[NREGIONS], int &shapeindex, Pattern &p, int &pat_index)
{
int mode = AVPCL::getmode(in);
//int mode = AVPCL::getmode(in);
pat_index = 0;
nvAssert (pat_index >= 0 && pat_index < NPATTERNS);
@ -580,7 +580,7 @@ static float exhaustive(const Vector4 colors[], const float importance[], int np
int bhigh = min((1<<bprec)-1, opt_endpts.B[ch] + bdelta);
// now there's no need to swap the ordering of A and B
bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
//bool a_le_b = opt_endpts.A[ch] <= opt_endpts.B[ch];
int amin, bmin;

@ -148,7 +148,7 @@ namespace nv
NV_FORCEINLINE bool isDone(const PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); return i == this->m_size; }
NV_FORCEINLINE void advance(PseudoIndex & i) const { nvDebugCheck(i <= this->m_size); i++; }
#if NV_CC_MSVC
#if NV_NEED_PSEUDOINDEX_WRAPPER
NV_FORCEINLINE T & operator[]( const PseudoIndexWrapper & i ) {
return m_buffer[i(this)];
}

@ -27,7 +27,7 @@
#define NV_FASTCALL __attribute__((fastcall))
#define NV_FORCEINLINE __attribute__((always_inline)) inline
#define NV_DEPRECATED __attribute__((deprecated))
#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX
#define NV_THREAD_LOCAL __thread
#if __GNUC__ > 2
#define NV_PURE __attribute__((pure))

@ -31,11 +31,6 @@ bool FileSystem::exists(const char * path)
// PathFileExists requires linking to shlwapi.lib
//return PathFileExists(path) != 0;
return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES;
#elif NV_OS_ORBIS
const int BUFFER_SIZE = 2048;
char file_fullpath[BUFFER_SIZE];
snprintf(file_fullpath, BUFFER_SIZE, "/app0/%s", path);
return sceFiosExistsSync(NULL, file_fullpath);
#else
if (FILE * fp = fopen(path, "r"))
{
@ -78,3 +73,31 @@ bool FileSystem::removeFile(const char * path)
// @@ Use unlink or remove?
return remove(path) == 0;
}
#include "StdStream.h" // for fileOpen
bool FileSystem::copyFile(const char * src, const char * dst) {
FILE * fsrc = fileOpen(src, "rb");
if (fsrc == NULL) return false;
NV_ON_RETURN(fclose(fsrc));
FILE * fdst = fileOpen(dst, "wb");
if (fdst == NULL) return false;
NV_ON_RETURN(fclose(fdst));
char buffer[1024];
size_t n;
while ((n = fread(buffer, sizeof(char), sizeof(buffer), fsrc)) > 0) {
if (fwrite(buffer, sizeof(char), n, fdst) != n) {
return false;
}
}
return true;
}

@ -15,7 +15,7 @@ namespace nv
NVCORE_API bool createDirectory(const char * path);
NVCORE_API bool changeDirectory(const char * path);
NVCORE_API bool removeFile(const char * path);
NVCORE_API bool copyFile(const char * src, const char * dst);
} // FileSystem namespace
} // nv namespace

@ -33,6 +33,8 @@ https://gcc.gnu.org/bugzilla/show_bug.cgi?id=6709
#else // If typeof not available:
#define NV_NEED_PSEUDOINDEX_WRAPPER 1
#include <new> // placement new
struct PseudoIndexWrapper {

@ -2,6 +2,7 @@
#include "Memory.h"
#include "Debug.h"
#include "Utils.h"
#include <stdlib.h>
@ -56,6 +57,7 @@ void * realloc(void * ptr, size_t size)
#endif
}
/* No need to override this unless we want line info.
void * operator new (size_t size) throw()
{
@ -116,4 +118,32 @@ void operator delete(void* p, const std::nothrow_t&) throw()
#endif // NV_OVERRIDE_ALLOC
void * nv::aligned_malloc(size_t size, size_t alignment)
{
// alignment must be a power of two, multiple of sizeof(void*)
nvDebugCheck(isPowerOfTwo(alignment));
nvDebugCheck((alignment & (sizeof(void*) - 1)) == 0);
#if NV_OS_WIN32 || NV_OS_DURANGO
return _aligned_malloc(size, alignment);
#elif NV_OS_DARWIN && !NV_OS_IOS
void * ptr = NULL;
posix_memalign(&ptr, alignment, size);
return ptr;
#elif NV_OS_LINUX
return memalign(alignment, size)
#else // NV_OS_ORBIS || NV_OS_IOS
// @@ IC: iOS appears to be 16 byte aligned, should we check alignment and assert if we request a higher alignment factor?
return ::malloc(size);
#endif
}
void nv::aligned_free(void * ptr)
{
#if NV_OS_WIN32 || NV_OS_DURANGO
_aligned_free(ptr);
#else
::free(ptr);
#endif
}

@ -7,10 +7,16 @@
#include "nvcore.h"
#include <stdlib.h> // malloc(), realloc() and free()
#include <string.h> // memset
//#include <stddef.h> // size_t
//#include <new> // new and delete
#define TRACK_MEMORY_LEAKS 0
#if TRACK_MEMORY_LEAKS
#include <vld.h>
#endif
#if NV_CC_GNUC
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
@ -41,6 +47,8 @@ extern "C" {
#endif
namespace nv {
NVCORE_API void * aligned_malloc(size_t size, size_t alignment);
NVCORE_API void aligned_free(void * );
// C++ helpers.
template <typename T> NV_FORCEINLINE T * malloc(size_t count) {

@ -213,9 +213,12 @@ namespace nv
#elif NV_OS_LINUX
return (uint)fread_unlocked(data, 1, len, m_fp);
#elif NV_OS_DARWIN
// @@ No error checking, always returns len.
// This is rather lame. Not sure if it's faster than the locked version.
for (uint i = 0; i < len; i++) {
((char *)data)[i] = getc_unlocked(m_fp);
if (feof_unlocked(m_fp) != 0) {
return i;
}
}
return len;
#else

@ -347,14 +347,19 @@ StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg )
}
/** Append a string. */
// Append a character.
StringBuilder & StringBuilder::append( char c )
{
return append(&c, 1);
}
// Append a string.
StringBuilder & StringBuilder::append( const char * s )
{
return append(s, U32(strlen( s )));
}
/** Append a string. */
// Append a string.
StringBuilder & StringBuilder::append(const char * s, uint len)
{
nvDebugCheck(s != NULL);
@ -367,6 +372,11 @@ StringBuilder & StringBuilder::append(const char * s, uint len)
return *this;
}
StringBuilder & StringBuilder::append(const StringBuilder & str)
{
return append(str.m_str, str.length());
}
/** Append a formatted string. */
StringBuilder & StringBuilder::appendFormat( const char * fmt, ... )
@ -516,6 +526,19 @@ StringBuilder & StringBuilder::copy( const StringBuilder & s )
return *this;
}
void StringBuilder::removeChar(char c)
{
char * src = strchr(m_str, c);
if (src) {
char * dst = src;
src++;
while (*src) {
*dst++ = *src++;
}
*dst = '\0';
}
}
bool StringBuilder::endsWith(const char * str) const
{
uint l = uint(strlen(str));
@ -530,7 +553,7 @@ bool StringBuilder::beginsWith(const char * str) const
return strncmp(m_str, str, l) == 0;
}
// Find given char starting from the end.
// Find given char starting from the end. Why not use strrchr!?
char * StringBuilder::reverseFind(char c)
{
int length = (int)strlen(m_str) - 1;
@ -563,6 +586,19 @@ char * StringBuilder::release()
return str;
}
// Take ownership of string.
void StringBuilder::acquire(char * str)
{
if (str) {
m_size = strLen(str) + 1;
m_str = str;
}
else {
m_size = 0;
m_str = NULL;
}
}
// Swap strings.
void nv::swap(StringBuilder & a, StringBuilder & b) {
swap(a.m_size, b.m_size);
@ -585,19 +621,20 @@ const char * Path::extension() const
/*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) {
nvCheck(path != NULL);
if (path != NULL) {
for (int i = 0;; i++) {
if (path[i] == '\0') break;
if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator;
}
}
}
/// Toggles path separators (ie. \\ into /).
void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/)
{
nvCheck(!isNull());
if (!isNull()) {
translatePath(m_str, pathSeparator);
}
}
void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/)

@ -105,8 +105,10 @@ namespace nv
StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
StringBuilder & formatList( const char * format, va_list arg );
StringBuilder & append(char c);
StringBuilder & append(const char * str);
StringBuilder & append(const char * str, uint len);
StringBuilder & append(const StringBuilder & str);
StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
StringBuilder & appendFormatList(const char * format, va_list arg);
@ -123,21 +125,24 @@ namespace nv
StringBuilder & toLower();
StringBuilder & toUpper();
void removeChar(char c);
bool endsWith(const char * str) const;
bool beginsWith(const char * str) const;
char * reverseFind(char c);
void reset();
bool isNull() const { return m_size == 0; }
NV_FORCEINLINE bool isNull() const { return m_size == 0; }
// const char * accessors
//operator const char * () const { return m_str; }
//operator char * () { return m_str; }
const char * str() const { return m_str; }
char * str() { return m_str; }
NV_FORCEINLINE const char * str() const { return m_str; }
NV_FORCEINLINE char * str() { return m_str; }
char * release();
char * release(); // Release ownership of string.
void acquire(char *); // Take ownership of string.
/// Implement value semantics.
StringBuilder & operator=( const StringBuilder & s ) {
@ -280,25 +285,25 @@ namespace nv
/// Equal operator.
bool operator==( const String & str ) const
{
return strMatch(str.data, data);
return strEqual(str.data, data);
}
/// Equal operator.
bool operator==( const char * str ) const
{
return strMatch(str, data);
return strEqual(str, data);
}
/// Not equal operator.
bool operator!=( const String & str ) const
{
return !strMatch(str.data, data);
return !strEqual(str.data, data);
}
/// Not equal operator.
bool operator!=( const char * str ) const
{
return !strMatch(str, data);
return !strEqual(str, data);
}
/// Returns true if this string is the null string.

@ -82,7 +82,7 @@ namespace nv
nvStaticCheck(sizeof(bool) == 4);
uint8 b = c ? 1 : 0;
s.serialize( &b, 1 );
c = (b == 1);
c = (b != 0);
#else
nvStaticCheck(sizeof(bool) == 1);
s.serialize( &c, 1 );

@ -39,6 +39,28 @@ namespace nv
// These intentionally look like casts.
// uint64 casts:
template <typename T> inline uint64 U64(T x) { return x; }
//template <> inline uint64 U64<uint64>(uint64 x) { return x; }
template <> inline uint64 U64<int64>(int64 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U32<uint32>(uint32 x) { return x; }
template <> inline uint64 U64<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U64<uint16>(uint16 x) { return x; }
template <> inline uint64 U64<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint64)x; }
//template <> inline uint64 U64<uint8>(uint8 x) { return x; }
template <> inline uint64 U64<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint64)x; }
// int64 casts:
template <typename T> inline int64 I64(T x) { return x; }
template <> inline int64 I64<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT64_MAX); return (int64)x; }
//template <> inline uint64 U64<int64>(int64 x) { return x; }
//template <> inline uint64 U32<uint32>(uint32 x) { return x; }
//template <> inline uint64 U64<int32>(int32 x) { return x; }
//template <> inline uint64 U64<uint16>(uint16 x) { return x; }
//template <> inline uint64 U64<int16>(int16 x) { return x; }
//template <> inline uint64 U64<uint8>(uint8 x) { return x; }
//template <> inline uint64 U64<int8>(int8 x) { return x; }
// uint32 casts:
template <typename T> inline uint32 U32(T x) { return x; }
template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
@ -50,6 +72,11 @@ namespace nv
//template <> inline uint32 U32<uint8>(uint8 x) { return x; }
template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
#if NV_OS_DARWIN
template <> inline uint32 U32<unsigned long>(unsigned long x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
template <> inline uint32 U32<long>(long x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
#endif
// int32 casts:
template <typename T> inline int32 I32(T x) { return x; }
template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
@ -182,7 +209,7 @@ namespace nv
* @note isPowerOfTwo(x) == true -> nextPowerOfTwo(x) == x
* @note nextPowerOfTwo(x) = 2 << log2(x-1)
*/
inline uint nextPowerOfTwo( uint x )
inline uint32 nextPowerOfTwo(uint32 x)
{
nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
@ -202,8 +229,19 @@ namespace nv
#endif
}
/// Return true if @a n is a power of two.
inline bool isPowerOfTwo( uint n )
inline uint64 nextPowerOfTwo(uint64 x)
{
nvDebugCheck(x != 0);
uint p = 1;
while (x > p) {
p += p;
}
return p;
}
// @@ Should I just use a macro instead?
template <typename T>
inline bool isPowerOfTwo(T n)
{
return (n & (n-1)) == 0;
}

@ -56,6 +56,7 @@
# define NV_OS_MINGW 1
# define NV_OS_WIN32 1
#elif defined POSH_OS_OSX
# define NV_OS_OSX 1 // IC: Adding this, because iOS defines NV_OS_DARWIN too.
# define NV_OS_DARWIN 1
# define NV_OS_UNIX 1
#elif defined POSH_OS_IOS
@ -78,9 +79,9 @@
// Threading:
// some platforms don't implement __thread or similar for thread-local-storage
#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios?
#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS
# define NV_OS_USE_PTHREAD 1
# if NV_OS_DARWIN || NV_OS_IOS
# if 0 //Apple finally added TLS support to iOS!// NV_OS_IOS
# define NV_OS_HAS_TLS_QUALIFIER 0
# else
# define NV_OS_HAS_TLS_QUALIFIER 1
@ -96,7 +97,7 @@
// NV_CPU_X86_64
// NV_CPU_PPC
// NV_CPU_ARM
// NV_CPU_AARCH64
// NV_CPU_ARM_64
#define NV_CPU_STRING POSH_CPU_STRING
@ -110,7 +111,7 @@
#elif defined POSH_CPU_STRONGARM
# define NV_CPU_ARM 1
#elif defined POSH_CPU_AARCH64
# define NV_CPU_AARCH64 1
# define NV_CPU_ARM_64 1
#else
# error "Unsupported CPU"
#endif
@ -148,10 +149,16 @@
#endif
// Endiannes:
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
#define NV_BIG_ENDIAN POSH_BIG_ENDIAN
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING
// @@ POSH endian detection is broken for arm64 on iOS. They are bi-endian and iOS sets all their processors to little endian by default.
#if NV_OS_IOS
# define NV_LITTLE_ENDIAN 1
# define NV_BIG_ENDIAN 0
# define NV_ENDIAN_STRING "little"
#else
# define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
# define NV_BIG_ENDIAN POSH_BIG_ENDIAN
# define NV_ENDIAN_STRING POSH_ENDIAN_STRING
#endif
// Define the right printf prefix for size_t arguments:
#if POSH_64BIT_POINTER
@ -164,6 +171,28 @@
// cmake config
#include "nvconfig.h"
#if NV_OS_DARWIN
#include <stdint.h>
//#include <inttypes.h>
// Type definitions:
typedef uint8_t uint8;
typedef int8_t int8;
typedef uint16_t uint16;
typedef int16_t int16;
typedef uint32_t uint32;
typedef int32_t int32;
typedef uint64_t uint64;
typedef int64_t int64;
// POSH gets this wrong due to __LP64__
#undef POSH_I64_PRINTF_PREFIX
#define POSH_I64_PRINTF_PREFIX "ll"
#else
// Type definitions:
typedef posh_u8_t uint8;
@ -175,8 +204,23 @@ typedef posh_i16_t int16;
typedef posh_u32_t uint32;
typedef posh_i32_t int32;
//#if NV_OS_DARWIN
// OSX-64 is supposed to be LP64 (longs and pointers are 64 bits), thus uint64 is defined as
// unsigned long. However, some OSX headers define it as unsigned long long, producing errors,
// even though both types are 64 bit. Ideally posh should handle that, but it has not been
// updated in ages, so here I'm just falling back to the standard C99 types defined in inttypes.h
//#include <inttypes.h>
//typedef posh_u64_t uint64_t;
//typedef posh_i64_t int64_t;
//#else
typedef posh_u64_t uint64;
typedef posh_i64_t int64;
//#endif
#if NV_OS_DARWIN
// To avoid duplicate definitions.
#define _UINT64
#endif
#endif
// Aliases
typedef uint32 uint;
@ -246,8 +290,10 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#include <stddef.h> // for size_t
template <typename T, size_t N> char (&ArraySizeHelper(T (&array)[N]))[N];
#define NV_ARRAY_SIZE(x) sizeof(ArraySizeHelper(x))
//#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#if 0 // Disabled in The Witness.
#if NV_CC_MSVC
@ -269,8 +315,38 @@ NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_STRING_JOIN3(AtStartup_, __LINE__, Instance); \
}
namespace nv {
template <typename F>
struct ScopeExit {
ScopeExit(F f) : f(f) {}
~ScopeExit() { f(); }
F f;
};
template <typename F>
ScopeExit<F> MakeScopeExit(F f) {
return ScopeExit<F>(f);
};
}
#define NV_ON_RETURN(code) \
auto NV_STRING_JOIN2(scope_exit_, __LINE__) = nv::MakeScopeExit([=](){code;})
// Indicate the compiler that the parameter is not used to suppress compier warnings.
#if NV_CC_MSVC
#define NV_UNUSED(a) ((a)=(a))
#else
#define NV_UNUSED(a) _Pragma(NV_STRING(unused(a)))
#endif
#if NV_CC_GNUC || NV_CC_CLANG
#define NV_LIKELY(x) __builtin_expect(!!(x), 1)
#define NV_UNLIKELY(x) __builtin_expect(!!(x), 0)
#else
#define NV_LIKELY(x) x
#define NV_UNLIKELY(x) x
#endif
// Null index. @@ Move this somewhere else... it's only used by nvmesh.
//const unsigned int NIL = unsigned int(~0);

@ -632,7 +632,7 @@ void BlockCTX1::setIndices(int * idx)
/// Decode BC6 block.
void BlockBC6::decodeBlock(Vector3 colors[16]) const
void BlockBC6::decodeBlock(Vector4 colors[16]) const
{
ZOH::Tile tile(4, 4);
ZOH::decompress((const char *)data, tile);
@ -648,6 +648,7 @@ void BlockBC6::decodeBlock(Vector3 colors[16]) const
colors[y * 4 + x].x = to_float(rHalf);
colors[y * 4 + x].y = to_float(gHalf);
colors[y * 4 + x].z = to_float(bHalf);
colors[y * 4 + x].w = 1.0f;
}
}
}

@ -36,6 +36,7 @@ namespace nv
struct AlphaBlock4x4;
class Stream;
class Vector3;
class Vector4;
/// DXT1 block.
@ -220,7 +221,7 @@ namespace nv
struct BlockBC6
{
uint8 data[16]; // Not even going to try to write a union for this thing.
void decodeBlock(Vector3 colors[16]) const;
void decodeBlock(Vector4 colors[16]) const;
};
/// BC7 block.

@ -14,7 +14,8 @@ SET(IMAGE_SRCS
NormalMap.h NormalMap.cpp
PixelFormat.h
PsdFile.h
TgaFile.h)
TgaFile.h
KtxFile.h KtxFile.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

@ -454,7 +454,8 @@ namespace
{ D3DFMT_L8, DXGI_FORMAT_R8_UNORM , { 8, 0xFF, 0, 0, 0 } },
{ D3DFMT_L16, DXGI_FORMAT_R16_UNORM, { 16, 0xFFFF, 0, 0, 0 } },
{ D3DFMT_A8L8, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0, 0, 0xFF00 } },
{ D3DFMT_A8L8, 0, { 16, 0xFF, 0, 0, 0xFF00 } },
{ 0, DXGI_FORMAT_R8G8_UNORM, { 16, 0xFF, 0xFF00, 0, 0 } },
};
static const uint s_formatCount = NV_ARRAY_SIZE(s_formats);
@ -635,7 +636,7 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{
// set fourcc pixel format.
this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3);
this->pf.fourcc = NV_MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0;
this->pf.rmask = 0;
@ -659,7 +660,7 @@ void DDSHeader::setFormatCode(uint32 code)
void DDSHeader::setSwizzleCode(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
{
this->pf.bitcount = MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = NV_MAKEFOURCC(c0, c1, c2, c3);
}
@ -1445,7 +1446,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
{
BlockBC6 block;
*stream << block;
Vector3 colors[16];
Vector4 colors[16];
block.decodeBlock(colors);
// Clamp to [0, 1] and round to 8-bit
@ -1453,7 +1454,7 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
{
for (int x = 0; x < 4; ++x)
{
Vector3 px = colors[y*4 + x];
Vector4 px = colors[y*4 + x];
rgba->color(x, y).setRGBA(
ftoi_round(clamp(px.x, 0.0f, 1.0f) * 255.0f),
ftoi_round(clamp(px.y, 0.0f, 1.0f) * 255.0f),
@ -1535,7 +1536,7 @@ uint DirectDrawSurface::surfaceSize(uint mipmap) const
else {
w = (w + 3) / 4;
h = (h + 3) / 4;
d = d; // @@ How are 3D textures aligned?
//d = d; // @@ How are 3D textures aligned?
return blockSize * w * h * d;
}
}

@ -27,11 +27,9 @@
#include "nvimage.h"
#if !defined(MAKEFOURCC)
#define MAKEFOURCC(ch0, ch1, ch2, ch3) \
#define NV_MAKEFOURCC(ch0, ch1, ch2, ch3) \
(uint(uint8(ch0)) | (uint(uint8(ch1)) << 8) | \
(uint(uint8(ch2)) << 16) | (uint(uint8(ch3)) << 24 ))
#endif
namespace nv
{
@ -101,19 +99,26 @@ namespace nv
enum FOURCC
{
FOURCC_NVTT = MAKEFOURCC('N', 'V', 'T', 'T'),
FOURCC_DDS = MAKEFOURCC('D', 'D', 'S', ' '),
FOURCC_DXT1 = MAKEFOURCC('D', 'X', 'T', '1'),
FOURCC_DXT2 = MAKEFOURCC('D', 'X', 'T', '2'),
FOURCC_DXT3 = MAKEFOURCC('D', 'X', 'T', '3'),
FOURCC_DXT4 = MAKEFOURCC('D', 'X', 'T', '4'),
FOURCC_DXT5 = MAKEFOURCC('D', 'X', 'T', '5'),
FOURCC_RXGB = MAKEFOURCC('R', 'X', 'G', 'B'),
FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1'),
FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2'),
FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y'),
FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0'),
FOURCC_UVER = MAKEFOURCC('U', 'V', 'E', 'R'),
FOURCC_NVTT = NV_MAKEFOURCC('N', 'V', 'T', 'T'),
FOURCC_DDS = NV_MAKEFOURCC('D', 'D', 'S', ' '),
FOURCC_DXT1 = NV_MAKEFOURCC('D', 'X', 'T', '1'),
FOURCC_DXT2 = NV_MAKEFOURCC('D', 'X', 'T', '2'),
FOURCC_DXT3 = NV_MAKEFOURCC('D', 'X', 'T', '3'),
FOURCC_DXT4 = NV_MAKEFOURCC('D', 'X', 'T', '4'),
FOURCC_DXT5 = NV_MAKEFOURCC('D', 'X', 'T', '5'),
FOURCC_RXGB = NV_MAKEFOURCC('R', 'X', 'G', 'B'),
FOURCC_ATI1 = NV_MAKEFOURCC('A', 'T', 'I', '1'),
FOURCC_ATI2 = NV_MAKEFOURCC('A', 'T', 'I', '2'),
FOURCC_A2XY = NV_MAKEFOURCC('A', '2', 'X', 'Y'),
FOURCC_DX10 = NV_MAKEFOURCC('D', 'X', '1', '0'),
FOURCC_UVER = NV_MAKEFOURCC('U', 'V', 'E', 'R'),
FOURCC_BC6H = NV_MAKEFOURCC('B', 'C', '6', 'H'),
FOURCC_BC7L = NV_MAKEFOURCC('B', 'C', '7', 'L'),
FOURCC_PVR0 = NV_MAKEFOURCC('P', 'V', 'R', '0'),
FOURCC_PVR1 = NV_MAKEFOURCC('P', 'V', 'R', '1'),
FOURCC_PVR2 = NV_MAKEFOURCC('P', 'V', 'R', '2'),
FOURCC_PVR3 = NV_MAKEFOURCC('P', 'V', 'R', '3'),
};

@ -132,6 +132,59 @@ float nv::averageAlphaError(const FloatImage * ref, const FloatImage * img)
}
float nv::rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight)
{
nvDebugCheck(img->componentCount() == 4);
nvDebugCheck(ref->componentCount() == 4);
double mse = 0;
const uint w0 = ref->width();
const uint h0 = ref->height();
const uint d0 = ref->depth();
const uint w1 = img->width();
const uint h1 = img->height();
const uint d1 = img->depth();
for (uint z = 0; z < d0; z++) {
for (uint y = 0; y < h0; y++) {
for (uint x = 0; x < w0; x++) {
float r0 = ref->pixel(0, x, y, z);
float g0 = ref->pixel(1, x, y, z);
float b0 = ref->pixel(2, x, y, z);
float a0 = ref->pixel(3, x, y, z);
float fx = float(x) / w0;
float fy = float(y) / h0;
float fz = float(z) / d0;
float r1 = img->sampleLinear(0, fx, fy, fz, wm);
float g1 = img->sampleLinear(1, fx, fy, fz, wm);
float b1 = img->sampleLinear(2, fx, fy, fz, wm);
float a1 = img->sampleLinear(2, fx, fy, fz, wm);
float dr = r0 - r1;
float dg = g0 - g1;
float db = b0 - b1;
float da = a0 - a1;
float w = 1;
if (alphaWeight) w = a0 * a0; // @@ a0*a1 or a0*a0 ?
mse += (dr * dr) * w;
mse += (dg * dg) * w;
mse += (db * db) * w;
mse += (da * da);
}
}
}
int count = w0 * h0 * d0;
return float(sqrt(mse / count));
}
// Color space conversions based on:
// http://www.brucelindbloom.com/

@ -1,5 +1,6 @@
#include "nvimage.h"
#include "FloatImage.h" // For FloatImage::WrapMode
namespace nv
@ -9,13 +10,15 @@ namespace nv
float rmsColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float rmsAlphaError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float rmsBilinearColorError(const FloatImage * ref, const FloatImage * img, FloatImage::WrapMode wm, bool alphaWeight);
float cieLabError(const FloatImage * ref, const FloatImage * img);
float cieLab94Error(const FloatImage * ref, const FloatImage * img);
float spatialCieLabError(const FloatImage * ref, const FloatImage * img);
float averageColorError(const FloatImage * ref, const FloatImage * img, bool alphaWeight);
float averageAlphaError(const FloatImage * ref, const FloatImage * img);
float averageAngularError(const FloatImage * img0, const FloatImage * img1);
float rmsAngularError(const FloatImage * img0, const FloatImage * img1);

@ -4,6 +4,8 @@
#include "Filter.h"
#include "Image.h"
#include "nvthread/ParallelFor.h"
#include "nvmath/Color.h"
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
@ -28,6 +30,13 @@ FloatImage::FloatImage() : m_componentCount(0), m_width(0), m_height(0), m_depth
{
}
FloatImage::FloatImage(const FloatImage & img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0),
m_pixelCount(0), m_floatCount(0), m_mem(NULL)
{
allocate(img.m_componentCount, img.m_width, img.m_height, img.m_depth);
memcpy(m_mem, img.m_mem, m_floatCount * sizeof(float));
}
/// Ctor. Init from image.
FloatImage::FloatImage(const Image * img) : m_componentCount(0), m_width(0), m_height(0), m_depth(0),
m_pixelCount(0), m_floatCount(0), m_mem(NULL)
@ -41,27 +50,32 @@ FloatImage::~FloatImage()
free();
}
/// Init the floating point image from a regular image.
void FloatImage::initFrom(const Image * img)
{
nvCheck(img != NULL);
allocate(4, img->width(), img->height(), img->depth());
uint channel_count = 3;
if (img->format() == Image::Format_ARGB) channel_count = 4;
allocate(channel_count, img->width(), img->height(), img->depth());
float * red_channel = channel(0);
float * green_channel = channel(1);
float * blue_channel = channel(2);
float * alpha_channel = channel(3);
float * alpha_channel = (channel_count == 4) ? channel(3) : NULL;
float scale = 1.0f / 255.0f;
const uint count = m_pixelCount;
for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [&](int i) {
Color32 pixel = img->pixel(i);
red_channel[i] = float(pixel.r) / 255.0f;
green_channel[i] = float(pixel.g) / 255.0f;
blue_channel[i] = float(pixel.b) / 255.0f;
alpha_channel[i] = float(pixel.a) / 255.0f;
}
red_channel[i] = float(pixel.r) * scale;
green_channel[i] = float(pixel.g) * scale;
blue_channel[i] = float(pixel.b) * scale;
if (channel_count == 4) alpha_channel[i] = float(pixel.a) * scale;
}//);
}
/// Convert the floating point image to a regular image.
@ -475,13 +489,17 @@ float FloatImage::sampleLinearClamp(uint c, float x, float y, float z) const
const float fracY = frac(y);
const float fracZ = frac(z);
//x -= fracX;
//y -= fracY;
//z -= fracZ;
// @@ Using floor in some places, but round in others?
const int ix0 = ::clamp(ifloor(x), 0, w-1);
const int iy0 = ::clamp(ifloor(y), 0, h-1);
const int iz0 = ::clamp(ifloor(z), 0, h-1);
const int iz0 = ::clamp(ifloor(z), 0, d-1);
const int ix1 = ::clamp(ifloor(x)+1, 0, w-1);
const int iy1 = ::clamp(ifloor(y)+1, 0, h-1);
const int iz1 = ::clamp(ifloor(z)+1, 0, h-1);
const int iz1 = ::clamp(ifloor(z)+1, 0, d-1);
return trilerp(c, ix0, iy0, iz0, ix1, iy1, iz1, fracX, fracY, fracZ);
}
@ -757,8 +775,8 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
dst_image->allocate(m_componentCount, w, h);
// @@ We could avoid this allocation, write directly to dst_plane.
Array<float> tmp_column(h);
tmp_column.resize(h);
//Array<float> tmp_column(h);
//tmp_column.resize(h);
for (uint c = 0; c < m_componentCount; c++)
{
@ -767,19 +785,21 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
float * tmp_plane = tmp_image->plane(c, z);
for (uint y = 0; y < m_height; y++) {
//parallel_for(m_height, [&](int y) {
this->applyKernelX(xkernel, y, z, c, wm, tmp_plane + y * w);
}
}//);
float * dst_plane = dst_image->plane(c, z);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer());
//parallel_for(w, [&](int x) {
tmp_image->applyKernelY(ykernel, x, z, c, wm, dst_plane + x, w);
// @@ We could avoid this copy, write directly to dst_plane.
for (uint y = 0; y < h; y++) {
/*for (uint y = 0; y < h; y++) {
dst_plane[y * w + x] = tmp_column[y];
}
}
}*/
}//);
}
}
}
@ -840,7 +860,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, W
for (uint z = 0; z < d; z++ ) {
for (uint x = 0; x < w; x++) {
tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer());
tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1);
for (uint y = 0; y < h; y++) {
dst_channel[z * h * w + y * w + x] = tmp_column[y];
@ -890,7 +910,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, WrapMode
float * dst_plane = dst_image->plane(c, z);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer());
tmp_image->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1);
// @@ Avoid this copy, write directly to dst_plane.
for (uint y = 0; y < h; y++) {
@ -961,7 +981,7 @@ FloatImage * FloatImage::resize(const Filter & filter, uint w, uint h, uint d, W
for (uint z = 0; z < d; z++ ) {
for (uint x = 0; x < w; x++) {
tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer());
tmp_image2->applyKernelY(ykernel, x, z, c, wm, tmp_column.buffer(), 1);
for (uint y = 0; y < h; y++) {
dst_channel[z * h * w + y * w + x] = tmp_column[y];
@ -1124,7 +1144,7 @@ void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, W
}
/// Apply 1D vertical kernel at the given coordinates and return result.
void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * __restrict output) const
void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * __restrict output, int output_stride) const
{
const uint length = k.length();
const float scale = float(length) / float(m_height);
@ -1151,7 +1171,7 @@ void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, W
sum += k.valueAt(i, j) * channel[idx];
}
output[i] = sum;
output[i * output_stride] = sum;
}
}
@ -1225,7 +1245,7 @@ void FloatImage::applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, u
}
/// Apply 1D vertical kernel at the given coordinates and return result.
void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * __restrict output) const
void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * __restrict output, int output_stride) const
{
const uint length = k.length();
const float scale = float(length) / float(m_height);
@ -1256,7 +1276,7 @@ void FloatImage::applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, u
sum += w * channel[idx];
}
output[i] = sum / norm;
output[i * output_stride] = sum / norm;
}
}
@ -1432,11 +1452,19 @@ void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int
float minAlphaScale = 0.0f;
float maxAlphaScale = 4.0f;
float alphaScale = 1.0f;
float bestAlphaScale = 1.0f;
float bestError = NV_FLOAT_MAX;
// Determine desired scale using a binary search. Hardcoded to 8 steps max.
// Determine desired scale using a binary search. Hardcoded to 10 steps max.
for (int i = 0; i < 10; i++) {
float currentCoverage = alphaTestCoverage(alphaRef, alphaChannel, alphaScale);
float error = fabsf(currentCoverage - desiredCoverage);
if (error < bestError) {
bestError = error;
bestAlphaScale = alphaScale;
}
if (currentCoverage < desiredCoverage) {
minAlphaScale = alphaScale;
}
@ -1451,7 +1479,7 @@ void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int
}
// Scale alpha channel.
scaleBias(alphaChannel, 1, alphaScale, 0.0f);
scaleBias(alphaChannel, 1, bestAlphaScale, 0.0f);
clamp(alphaChannel, 1, 0.0f, 1.0f);
#endif
#if _DEBUG

@ -35,6 +35,7 @@ namespace nv
};
NVIMAGE_API FloatImage();
NVIMAGE_API FloatImage(const FloatImage & img);
NVIMAGE_API FloatImage(const Image * img);
NVIMAGE_API virtual ~FloatImage();
@ -92,10 +93,10 @@ namespace nv
NVIMAGE_API float applyKernelY(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API float applyKernelZ(const Kernel1 * k, int x, int y, int z, uint c, WrapMode wm) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelX(const PolyphaseKernel & k, int y, int z, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output) const;
NVIMAGE_API void applyKernelY(const PolyphaseKernel & k, int x, int z, uint c, uint a, WrapMode wm, float * output, int output_stride) const;
NVIMAGE_API void applyKernelZ(const PolyphaseKernel & k, int x, int y, uint c, uint a, WrapMode wm, float * output) const;

@ -42,13 +42,21 @@ const Image & Image::operator=(const Image & img)
void Image::allocate(uint w, uint h, uint d/*= 1*/)
{
free();
m_width = w;
m_height = h;
m_depth = d;
m_data = realloc<Color32>(m_data, w * h * d);
}
void Image::acquire(Color32 * data, uint w, uint h, uint d/*= 1*/)
{
free();
m_width = w;
m_height = h;
m_depth = d;
m_data = data;
}
void Image::resize(uint w, uint h, uint d/*= 1*/) {
Image img;

@ -34,6 +34,7 @@ namespace nv
void allocate(uint w, uint h, uint d = 1);
void acquire(Color32 * data, uint w, uint h, uint d = 1);
bool load(const char * name);
void resize(uint w, uint h, uint d = 1);

@ -8,6 +8,8 @@
#include "DirectDrawSurface.h"
#include "PixelFormat.h"
#include "nvthread/ParallelFor.h"
#include "nvmath/Color.h"
#include "nvmath/Half.h"
@ -19,31 +21,31 @@
#include "nvcore/TextWriter.h"
// Extern
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
# include <FreeImage.h>
// If FreeImage available, do not use individual libraries, since that produces link conflicts in some platforms.
# undef HAVE_JPEG
# undef HAVE_PNG
# undef HAVE_TIFF
# undef HAVE_OPENEXR
# undef NV_HAVE_JPEG
# undef NV_HAVE_PNG
# undef NV_HAVE_TIFF
# undef NV_HAVE_OPENEXR
#endif
#if defined(HAVE_JPEG)
#if defined(NV_HAVE_JPEG)
extern "C" {
# include <jpeglib.h>
}
#endif
#if defined(HAVE_PNG)
#if defined(NV_HAVE_PNG)
# include <png.h>
#endif
#if defined(HAVE_TIFF)
#if defined(NV_HAVE_TIFF)
# define _TIFF_DATA_TYPEDEFS_
# include <tiffio.h>
#endif
#if defined(HAVE_OPENEXR)
#if defined(NV_HAVE_OPENEXR)
# include <ImfIO.h>
# include <ImathBox.h>
# include <ImfChannelList.h>
@ -52,7 +54,7 @@ extern "C" {
# include <ImfArray.h>
#endif
#if defined(HAVE_STBIMAGE)
#if defined(NV_HAVE_STBIMAGE)
# define STBI_NO_STDIO
# include <stb_image.h>
#endif
@ -303,6 +305,51 @@ static bool saveTGA(Stream & s, const Image * img)
return true;
}
#pragma optimize("", off)
// Save BMP image.
static bool saveBMP(Stream & s, const Image * img)
{
int w = img->width();
int h = img->height();
int image_size = w * h * 3;
BmpFileHeader header;
zero(header);
header.type = BM_TYPE;
header.size = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE + image_size;
header.offBits = BITMAPFILEHEADER_SIZE + BITMAPINFOHEADER_SIZE;
BmpInfoHeader info;
zero(info);
info.size = BITMAPINFOHEADER_SIZE;
info.width = w;
info.height = h;
info.planes = 1;
info.bitCount = 24;
info.sizeImage = image_size;
info.xPelsPerMeter = 2000;
info.yPelsPerMeter = 2000;
s << header;
s << info;
nv::Array<uint8> data;
data.resize(3 * w);
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
data[x * 3 + 0] = img->pixel(x, h - y - 1).b;
data[x * 3 + 1] = img->pixel(x, h - y - 1).g;
data[x * 3 + 2] = img->pixel(x, h - y - 1).r;
}
s.serialize(data.buffer(), data.size());
}
return true;
}
/*static Image * loadPPM(Stream & s)
{
// @@
@ -324,7 +371,10 @@ static bool savePPM(Stream & s, const Image * img)
writer.writeString("255\n");
for (uint i = 0; i < w * h; i++) {
Color32 c = img->pixel(i);
s << (uint8_t&)c.r << (uint8_t&)c.g << (uint8_t&)c.b;
uint8 r = c.r; // current version of apple's llvm compiling for arm64 doesn't like taking the address of a bit-field. Workaround by using the stack
uint8 g = c.g;
uint8 b = c.b;
s << r << g << b;
}
return true;
@ -653,7 +703,7 @@ static bool saveFloatDDS(Stream & s, const FloatImage * img, uint base_component
}
#if defined(HAVE_PNG)
#if defined(NV_HAVE_PNG)
static void user_read_data(png_structp png_ptr, png_bytep data, png_size_t length)
{
@ -902,9 +952,9 @@ static bool savePNG(Stream & s, const Image * img, const char ** tags/*=NULL*/)
return true;
}
#endif // defined(HAVE_PNG)
#endif // defined(NV_HAVE_PNG)
#if defined(HAVE_JPEG)
#if defined(NV_HAVE_JPEG)
static void init_source (j_decompress_ptr /*cinfo*/){
}
@ -1011,9 +1061,9 @@ static Image * loadJPG(Stream & s)
return img.release();
}
#endif // defined(HAVE_JPEG)
#endif // defined(NV_HAVE_JPEG)
#if defined(HAVE_TIFF)
#if defined(NV_HAVE_TIFF)
/*
static tsize_t tiffReadWriteProc(thandle_t h, tdata_t ptr, tsize_t size)
@ -1207,9 +1257,9 @@ static bool saveFloatTIFF(const char * fileName, const FloatImage * fimage, uint
return true;
}
#endif // defined(HAVE_TIFF)
#endif // defined(NV_HAVE_TIFF)
#if defined(HAVE_OPENEXR)
#if defined(NV_HAVE_OPENEXR)
namespace
{
@ -1348,10 +1398,10 @@ static bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint
return true;
}
#endif // defined(HAVE_OPENEXR)
#endif // defined(NV_HAVE_OPENEXR)
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
static unsigned DLL_CALLCONV ReadProc(void *buffer, unsigned size, unsigned count, fi_handle handle)
{
@ -1688,10 +1738,10 @@ bool nv::ImageIO::saveFloatFreeImage(FREE_IMAGE_FORMAT fif, Stream & s, const Fl
return result;
}
#endif // defined(HAVE_FREEIMAGE)
#endif // defined(NV_HAVE_FREEIMAGE)
#if defined(HAVE_STBIMAGE)
#if defined(NV_HAVE_STBIMAGE)
static Image * loadSTB(Stream & s)
{
@ -1704,28 +1754,22 @@ static Image * loadSTB(Stream & s)
int w, h, n;
uint8 * data = stbi_load_from_memory(buffer, size, &w, &h, &n, 4);
// @@ Hack: STB is returning n=4, because we request 4 components, even when input only has 3.
n = 3;
delete [] buffer;
if (data != NULL) {
Image * img = new Image;
img->allocate(w, h);
img->acquire((Color32 *)data, w, h);
img->setFormat(n == 4 ? Image::Format_ARGB : Image::Format_RGB);
for (int y = 0; y < h; ++y)
{
nv::Color32* dest = img->scanline(y);
uint8* src = data + y * w * 4;
for (int x = 0; x < w; ++x)
{
dest[x].r = src[x * 4 + 0];
dest[x].g = src[x * 4 + 1];
dest[x].b = src[x * 4 + 2];
dest[x].a = src[x * 4 + 3];
}
}
free(data);
int count = w * h;
for (int i = 0; i < count; ++i) {
//parallel_for(count, 128, [&](int i) {
Color32 & pixel = img->pixel(i);
swap(pixel.r, pixel.b);
}//);
return img;
}
@ -1766,7 +1810,7 @@ static FloatImage * loadFloatSTB(Stream & s)
return NULL;
}
#endif // defined(HAVE_STBIMAGE)
#endif // defined(NV_HAVE_STBIMAGE)
@ -1804,32 +1848,33 @@ Image * nv::ImageIO::load(const char * fileName, Stream & s)
return loadPPM(s);
}*/
#if defined(HAVE_JPEG)
#if defined(NV_HAVE_JPEG)
if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) {
return loadJPG(s);
}
#endif
#if defined(HAVE_PNG)
#if defined(NV_HAVE_PNG)
if (strCaseDiff(extension, ".png") == 0) {
return loadPNG(s);
}
#endif
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) {
return loadFreeImage(fif, s);
}
#endif
#if defined(HAVE_STBIMAGE)
#if defined(NV_HAVE_STBIMAGE)
return loadSTB(s);
#endif
return NULL;
}
bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, const char ** tags/*=NULL*/)
{
nvDebugCheck(fileName != NULL);
@ -1838,6 +1883,10 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
const char * extension = Path::extension(fileName);
if (strCaseDiff(extension, ".bmp") == 0) {
return saveBMP(s, img);
}
if (strCaseDiff(extension, ".tga") == 0) {
return saveTGA(s, img);
}
@ -1846,13 +1895,13 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
return savePPM(s, img);
}
#if defined(HAVE_PNG)
#if defined(NV_HAVE_PNG)
if (strCaseDiff(extension, ".png") == 0) {
return savePNG(s, img, tags);
}
#endif
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
return saveFreeImage(fif, s, img, tags);
@ -1899,27 +1948,27 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
return loadFloatPFM(s);
}*/
#if defined(HAVE_TIFF)
#if defined(NV_HAVE_TIFF)
#pragma NV_MESSAGE("TODO: Load TIFF from stream.")
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
return loadFloatTIFF(fileName, s);
}
#endif
#if defined(HAVE_OPENEXR)
#if defined(NV_HAVE_OPENEXR)
#pragma NV_MESSAGE("TODO: Load EXR from stream.")
if (strCaseDiff(extension, ".exr") == 0) {
return loadFloatEXR(fileName, s);
}
#endif
#if defined(HAVE_STBIMAGE)
#if defined(NV_HAVE_STBIMAGE)
if (strCaseDiff(extension, ".hdr") == 0) {
return loadFloatSTB(s);
}
#endif
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsReading(fif)) {
return loadFloatFreeImage(fif, s);
@ -1961,7 +2010,7 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
return saveFloatPFM(s, fimage, baseComponent, componentCount);
}*/
#if defined(HAVE_FREEIMAGE)
#if defined(NV_HAVE_FREEIMAGE)
FREE_IMAGE_FORMAT fif = FreeImage_GetFIFFromFilename(fileName);
if (fif != FIF_UNKNOWN && FreeImage_FIFSupportsWriting(fif)) {
return saveFloatFreeImage(fif, s, fimage, baseComponent, componentCount);
@ -2005,14 +2054,15 @@ bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, ui
}
const char * extension = Path::extension(fileName);
NV_UNUSED(extension);
#if defined(HAVE_OPENEXR)
#if defined(NV_HAVE_OPENEXR)
if (strCaseDiff(extension, ".exr") == 0) {
return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
}
#endif
#if defined(HAVE_TIFF)
#if defined(NV_HAVE_TIFF)
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
return saveFloatTIFF(fileName, fimage, baseComponent, componentCount);
}

@ -1,6 +1,7 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#include "KtxFile.h"
#include "nvcore/StdStream.h"
using namespace nv;
@ -10,6 +11,8 @@ static const uint8 fileIdentifier[12] = {
0x0D, 0x0A, 0x1A, 0x0A
};
namespace nv
{
KtxHeader::KtxHeader() {
memcpy(identifier, fileIdentifier, 12);
@ -19,8 +22,8 @@ KtxHeader::KtxHeader() {
glType = 0;
glTypeSize = 1;
glFormat = 0;
glInternalFormat = KTX_RGBA;
glBaseInternalFormat = KTX_RGBA;
glInternalFormat = KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1;
glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
pixelWidth = 0;
pixelHeight = 0;
pixelDepth = 0;
@ -31,9 +34,9 @@ KtxHeader::KtxHeader() {
}
Stream & operator<< (Stream & s, DDSHeader & header) {
Stream & operator<< (Stream & s, KtxHeader & header) {
s.serialize(header.identifier, 12);
s << header.endiannes << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat;
s << header.endianness << header.glType << header.glTypeSize << header.glFormat << header.glInternalFormat << header.glBaseInternalFormat;
s << header.pixelWidth << header.pixelHeight << header.pixelDepth;
s << header.numberOfArrayElements << header.numberOfFaces << header.numberOfMipmapLevels;
s << header.bytesOfKeyValueData;
@ -41,7 +44,7 @@ Stream & operator<< (Stream & s, DDSHeader & header) {
}
KtxFile::KtxFile() {
/*KtxFile::KtxFile() {
}
KtxFile::~KtxFile() {
}
@ -49,7 +52,7 @@ KtxFile::~KtxFile() {
void KtxFile::addKeyValue(const char * key, const char * value) {
keyArray.append(key);
valueArray.append(value);
bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1;
header.bytesOfKeyValueData += strlen(key) + 1 + strlen(value) + 1;
}
@ -77,7 +80,8 @@ Stream & operator<< (Stream & s, KtxFile & file) {
}
return s;
}
}*/
} // nv

@ -6,6 +6,7 @@
#include "nvimage.h"
#include "nvcore/StrLib.h"
#include "nvcore/Array.h"
// KTX File format specification:
// http://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#key
@ -14,22 +15,99 @@ namespace nv
{
class Stream;
// GL types (Table 3.2)
const uint KTX_UNSIGNED_BYTE;
const uint KTX_UNSIGNED_SHORT_5_6_5;
// ...
// GL formats (Table 3.3)
// ...
// GL internal formats (Table 3.12, 3.13)
// ...
// GL base internal format. (Table 3.11)
const uint KTX_RGB;
const uint KTX_RGBA;
const uint KTX_ALPHA;
// ...
// GL types
const uint KTX_UNSIGNED_BYTE = 0x1401;
const uint KTX_BYTE = 0x1400;
const uint KTX_UNSIGNED_SHORT = 0x1403;
const uint KTX_SHORT = 0x1402;
const uint KTX_UNSIGNED_INT = 0x1405;
const uint KTX_INT = 0x1404;
const uint KTX_FLOAT = 0x1406;
const uint KTX_UNSIGNED_BYTE_3_3_2 = 0x8032;
const uint KTX_UNSIGNED_BYTE_2_3_3_REV = 0x8362;
const uint KTX_UNSIGNED_SHORT_5_6_5 = 0x8363;
const uint KTX_UNSIGNED_SHORT_5_6_5_REV = 0x8364;
const uint KTX_UNSIGNED_SHORT_4_4_4_4 = 0x8033;
const uint KTX_UNSIGNED_SHORT_4_4_4_4_REV = 0x8365;
const uint KTX_UNSIGNED_SHORT_5_5_5_1 = 0x8034;
const uint KTX_UNSIGNED_SHORT_1_5_5_5_REV = 0x8366;
const uint KTX_UNSIGNED_INT_8_8_8_8 = 0x8035;
const uint KTX_UNSIGNED_INT_8_8_8_8_REV = 0x8367;
const uint KTX_UNSIGNED_INT_10_10_10_2 = 0x8036;
const uint KTX_UNSIGNED_INT_2_10_10_10_REV = 0x8368;
// GL formats
const uint KTX_FORMAT_RED = 0x1903;
const uint KTX_FORMAT_RG = 0x8227;
const uint KTX_FORMAT_RGB = 0x1907;
const uint KTX_FORMAT_BGR = 0x80E0;
const uint KTX_FORMAT_RGBA = 0x1908;
const uint KTX_FORMAT_BGRA = 0x80E1;
const uint KTX_FORMAT_RED_INTEGER = 0x8D94;
const uint KTX_FORMAT_RG_INTEGER = 0x8228;
const uint KTX_FORMAT_RGB_INTEGER = 0x8D98;
const uint KTX_FORMAT_BGR_INTEGER = 0x8D9A;
const uint KTX_FORMAT_RGBA_INTEGER = 0x8D99;
const uint KTX_FORMAT_BGRA_INTEGER = 0x8D9B;
const uint KTX_FORMAT_STENCIL_INDEX = 0x1901;
const uint KTX_FORMAT_DEPTH_COMPONENT = 0x1902;
const uint KTX_FORMAT_DEPTH_STENCIL = 0x84F9;
// GL internal formats
// BC1
const uint KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1 = 0x83F0;
const uint KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 = 0x8C4C;
// BC1a
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1 = 0x83F1;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 = 0x8C4D;
// BC2
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3 = 0x83F2;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 = 0x8C4E;
// BC3
const uint KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5 = 0x83F3;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 = 0x8C4F;
// BC4
const uint KTX_INTERNAL_COMPRESSED_RED_RGTC1 = 0x8DBB;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 = 0x8DBC;
// BC5
const uint KTX_INTERNAL_COMPRESSED_RG_RGTC2 = 0x8DBD;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 = 0x8DBE;
// BC6
const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT = 0x8E8F;
const uint KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT = 0x8E8E;
// BC7
const uint KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM = 0x8E8C;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM = 0x8E8D;
// ETC
const uint KTX_INTERNAL_COMPRESSED_RGB_ETC1 = 0x8D64;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC1 = 0x8D64; // ???
// ETC2
const uint KTX_INTERNAL_COMPRESSED_RED_EAC = 0x9270;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RED_EAC = 0x9271;
const uint KTX_INTERNAL_COMPRESSED_RG_EAC = 0x9272;
const uint KTX_INTERNAL_COMPRESSED_SIGNED_RG_EAC = 0x9273;
const uint KTX_INTERNAL_COMPRESSED_RGB_ETC2 = 0x9274;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ETC2 = 0x9275;
const uint KTX_INTERNAL_COMPRESSED_RGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9276;
const uint KTX_INTERNAL_COMPRESSED_SRGB_PUNCHTHROUGH_ALPHA_ETC2 = 0x9277;
const uint KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC = 0x9278;
const uint KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC = 0x9279;
// GL base internal formats
const uint KTX_BASE_INTERNAL_DEPTH_COMPONENT = 0x1902;
const uint KTX_BASE_INTERNAL_DEPTH_STENCIL = 0x84F9;
const uint KTX_BASE_INTERNAL_RED = 0x1903;
const uint KTX_BASE_INTERNAL_RG = 0x8227;
const uint KTX_BASE_INTERNAL_RGB = 0x1907;
const uint KTX_BASE_INTERNAL_RGBA = 0x1908;
const uint KTX_BASE_INTERNAL_STENCIL_INDEX = 0x1901;
struct KtxHeader {
@ -52,10 +130,10 @@ namespace nv
};
NVIMAGE_API Stream & operator<< (Stream & s, DDSHeader & header);
NVIMAGE_API Stream & operator<< (Stream & s, KtxHeader & header);
struct KtxFile {
/* struct KtxFile {
KtxFile();
~KtxFile();
@ -66,10 +144,9 @@ namespace nv
Array<String> keyArray;
Array<String> valueArray;
};
NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file);
NVIMAGE_API Stream & operator<< (Stream & s, KtxFile & file);*/
/*

@ -101,6 +101,48 @@ inline Stream & operator<< (Stream & s, TgaFile & tga)
return s;
}
// @@ Move to BMP file?
const int BITMAPFILEHEADER_SIZE = 14;
const int BITMAPINFOHEADER_SIZE = 40;
const int BM_TYPE = ((unsigned int)'M') << 8 | ((unsigned int)'B');
// BMP Header.
struct BmpFileHeader {
uint16 type;
uint32 size;
uint16 reserved1;
uint16 reserved2;
uint32 offBits;
};
struct BmpInfoHeader {
uint32 size;
uint32 width;
uint32 height;
uint16 planes;
uint16 bitCount;
uint32 compression;
uint32 sizeImage;
uint32 xPelsPerMeter;
uint32 yPelsPerMeter;
uint32 clrUsed;
uint32 clrImportant;
};
inline Stream & operator<< (Stream & s, BmpFileHeader & bmp) {
return s << bmp.type << bmp.size << bmp.reserved1 << bmp.reserved2 << bmp.offBits;
}
inline Stream & operator<< (Stream & s, BmpInfoHeader & bmp) {
s << bmp.size << bmp.width << bmp.height << bmp.planes << bmp.bitCount << bmp.compression << bmp.sizeImage;
s << bmp.xPelsPerMeter << bmp.yPelsPerMeter << bmp.clrUsed << bmp.clrImportant;
return s;
}
} // nv namespace
#endif // NV_IMAGE_TGAFILE_H

@ -7,7 +7,7 @@ SET(MATH_SRCS
Fitting.h Fitting.cpp
Gamma.h Gamma.cpp
Half.h Half.cpp
Matrix.h
Matrix.h Matrix.inl Matrix.cpp
Plane.h Plane.inl Plane.cpp
SphericalHarmonic.h SphericalHarmonic.cpp
SimdVector.h SimdVector_SSE.h SimdVector_VE.h

@ -157,6 +157,12 @@ namespace nv
return Vector4(c.r * scale, c.g * scale, c.b * scale, c.a * scale);
}
inline Vector3 toVector3(Color32 c)
{
const float scale = 1.0f / 255.0f;
return Vector3(c.r * scale, c.g * scale, c.b * scale);
}
inline float perceptualColorDistance(Vector3::Arg c0, Vector3::Arg c1)
{

@ -197,6 +197,36 @@ bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
return true;
}
bool nv::solveLU(const Matrix2 & A, const Vector2 & b, Vector2 * x)
{
nvDebugCheck(x != NULL);
float m[2][2];
float *a[2] = {m[0], m[1]};
int idx[2];
float d;
for (int y = 0; y < 2; y++) {
for (int x = 0; x < 2; x++) {
a[x][y] = A(x, y);
}
}
// Create LU decomposition.
if (!ludcmp(a, 2, idx, &d)) {
// Singular matrix.
return false;
}
// Init solution.
*x = b;
// Do back substitution.
lubksb(a, 2, idx, x->component);
return true;
}
bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
{
@ -223,6 +253,22 @@ bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
return true;
}
bool nv::solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x)
{
nvDebugCheck(x != NULL);
const float det = A.determinant();
if (equal(det, 0.0f)) { // @@ Use input epsilon.
return false;
}
Matrix2 Ai = inverseCramer(A);
*x = transform(Ai, b);
return true;
}
// Inverse using gaussian elimination. From Jon's code.

@ -14,6 +14,46 @@ namespace nv
{
enum identity_t { identity };
// 2x2 matrix.
class NVMATH_CLASS Matrix2
{
public:
Matrix2();
explicit Matrix2(float f);
explicit Matrix2(identity_t);
Matrix2(const Matrix2 & m);
Matrix2(Vector2::Arg v0, Vector2::Arg v1);
Matrix2(float a, float b, float c, float d);
float data(uint idx) const;
float & data(uint idx);
float get(uint row, uint col) const;
float operator()(uint row, uint col) const;
float & operator()(uint row, uint col);
Vector2 row(uint i) const;
Vector2 column(uint i) const;
void operator*=(float s);
void operator/=(float s);
void operator+=(const Matrix2 & m);
void operator-=(const Matrix2 & m);
void scale(float s);
void scale(Vector2::Arg s);
float determinant() const;
private:
float m_data[4];
};
// Solve equation system using LU decomposition and back-substitution.
extern bool solveLU(const Matrix2 & m, const Vector2 & b, Vector2 * x);
// Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix2 & A, const Vector2 & b, Vector2 * x);
// 3x3 matrix.
class NVMATH_CLASS Matrix3
{
@ -52,6 +92,8 @@ namespace nv
// Solve equation system using Cramer's inverse.
extern bool solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x);
extern Matrix3 inverse(const Matrix3 & m);
// 4x4 matrix.
class NVMATH_CLASS Matrix
@ -106,7 +148,6 @@ namespace nv
// Compute inverse using Gaussian elimination and partial pivoting.
extern Matrix inverse(const Matrix & m);
extern Matrix3 inverse(const Matrix3 & m);
} // nv namespace

@ -8,6 +8,199 @@
namespace nv
{
inline Matrix2::Matrix2() {}
inline Matrix2::Matrix2(float f)
{
for(int i = 0; i < 4; i++) {
m_data[i] = f;
}
}
inline Matrix2::Matrix2(identity_t)
{
for(int i = 0; i < 2; i++) {
for(int j = 0; j < 2; j++) {
m_data[2*j+i] = (i == j) ? 1.0f : 0.0f;
}
}
}
inline Matrix2::Matrix2(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] = m.m_data[i];
}
}
inline Matrix2::Matrix2(Vector2::Arg v0, Vector2::Arg v1)
{
m_data[0] = v0.x; m_data[1] = v0.y;
m_data[2] = v1.x; m_data[3] = v1.y;
}
inline Matrix2::Matrix2(float a, float b, float c, float d)
{
m_data[0] = a; m_data[1] = b;
m_data[2] = c; m_data[3] = d;
}
inline float Matrix2::data(uint idx) const
{
nvDebugCheck(idx < 4);
return m_data[idx];
}
inline float & Matrix2::data(uint idx)
{
nvDebugCheck(idx < 4);
return m_data[idx];
}
inline float Matrix2::get(uint row, uint col) const
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline float Matrix2::operator()(uint row, uint col) const
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline float & Matrix2::operator()(uint row, uint col)
{
nvDebugCheck(row < 2 && col < 2);
return m_data[col * 2 + row];
}
inline Vector2 Matrix2::row(uint i) const
{
nvDebugCheck(i < 2);
return Vector2(get(i, 0), get(i, 1));
}
inline Vector2 Matrix2::column(uint i) const
{
nvDebugCheck(i < 2);
return Vector2(get(0, i), get(1, i));
}
inline void Matrix2::operator*=(float s)
{
for(int i = 0; i < 4; i++) {
m_data[i] *= s;
}
}
inline void Matrix2::operator/=(float s)
{
float is = 1.0f /s;
for(int i = 0; i < 4; i++) {
m_data[i] *= is;
}
}
inline void Matrix2::operator+=(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] += m.m_data[i];
}
}
inline void Matrix2::operator-=(const Matrix2 & m)
{
for(int i = 0; i < 4; i++) {
m_data[i] -= m.m_data[i];
}
}
inline Matrix2 operator+(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m = a;
m += b;
return m;
}
inline Matrix2 operator-(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m = a;
m -= b;
return m;
}
inline Matrix2 operator*(const Matrix2 & a, float s)
{
Matrix2 m = a;
m *= s;
return m;
}
inline Matrix2 operator*(float s, const Matrix2 & a)
{
Matrix2 m = a;
m *= s;
return m;
}
inline Matrix2 operator/(const Matrix2 & a, float s)
{
Matrix2 m = a;
m /= s;
return m;
}
inline Matrix2 mul(const Matrix2 & a, const Matrix2 & b)
{
Matrix2 m;
for(int i = 0; i < 2; i++) {
const float ai0 = a(i,0), ai1 = a(i,1);
m(i, 0) = ai0 * b(0,0) + ai1 * b(1,0);
m(i, 1) = ai0 * b(0,1) + ai1 * b(1,1);
}
return m;
}
inline Matrix2 operator*(const Matrix2 & a, const Matrix2 & b)
{
return mul(a, b);
}
// Transform the given 3d vector with the given matrix.
inline Vector2 transform(const Matrix2 & m, const Vector2 & p)
{
return Vector2(p.x * m(0,0) + p.y * m(0,1),
p.x * m(1,0) + p.y * m(1,1));
}
inline void Matrix2::scale(float s)
{
for (int i = 0; i < 4; i++) {
m_data[i] *= s;
}
}
inline void Matrix2::scale(Vector2::Arg s)
{
m_data[0] *= s.x; m_data[1] *= s.x;
m_data[2] *= s.y; m_data[3] *= s.y;
}
inline float Matrix2::determinant() const
{
return get(0,0) * get(1,1) - get(0,1) * get(1,0);
}
// Inverse using Cramer's rule.
inline Matrix2 inverseCramer(const Matrix2 & m)
{
const float det = m.determinant();
if (equal(det, 0.0f, 0.0f)) {
return Matrix2(0);
}
return m * (1/det);
}
inline Matrix3::Matrix3() {}
inline Matrix3::Matrix3(float f)
@ -794,7 +987,7 @@ v1 = FXVector3.Cross(v3, v2);
v1.Normalize();
Matrix R = Matrix::Identity;
R[0, 0] = v3.X; // Not sure this is in the correct order...
R[0, 0] = v3.X; // Not sure this is in the correct order...
R[1, 0] = v3.Y;
R[2, 0] = v3.Z;
R[0, 1] = v1.X;

@ -7,10 +7,6 @@
#include "nvmath.h"
#include "Vector.h"
#if NV_USE_ALTIVEC
#undef vector
#endif
namespace nv
{
class Matrix;
@ -29,6 +25,7 @@ namespace nv
Vector3 vector() const;
float offset() const;
Vector3 normal() const;
void operator*=(float s);

@ -24,6 +24,7 @@ namespace nv
inline Vector3 Plane::vector() const { return v.xyz(); }
inline float Plane::offset() const { return v.w; }
inline Vector3 Plane::normal() const { return normalize(vector(), 0.0f); }
// Normalize plane.
inline Plane normalize(const Plane & plane, float epsilon = NV_EPSILON)

@ -452,7 +452,7 @@ namespace nv
// That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point
// numbers and the results becomes very unstable and dependent on the order of the factors.
// Instead, it's preferable to substract the vertices first, and multiply the resulting small values together. The result
// Instead, it's preferable to subtract the vertices first, and multiply the resulting small values together. The result
// in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of
// the triangle.

@ -194,15 +194,20 @@ namespace nv
#endif
}
inline uint log2(uint i)
inline uint log2(uint32 i)
{
uint value = 0;
while( i >>= 1 ) {
value++;
}
uint32 value = 0;
while( i >>= 1 ) value++;
return value;
}
inline uint log2(uint64 i)
{
uint64 value = 0;
while (i >>= 1) value++;
return U32(value);
}
inline float lerp(float f0, float f1, float t)
{
const float s = 1.0f - t;

@ -107,6 +107,11 @@ namespace nv {
#endif
}
inline void storeRelease(volatile float * ptr, float value)
{
storeRelease((uint32 *)ptr, *(uint32 *)&value);
}
template <typename T>
inline void storeReleasePointer(volatile T * pTo, T from)

@ -17,7 +17,7 @@ struct Event::Private {
};
Event::Event() : m(new Private) {
m->handle = CreateEvent(NULL, FALSE, FALSE, NULL);
m->handle = CreateEvent(/*lpEventAttributes=*/NULL, /*bManualReset=*/FALSE, /*bInitialState=*/FALSE, /*lpName=*/NULL);
}
Event::~Event() {

@ -13,7 +13,9 @@
#endif // NV_OS
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h>
extern HTELEMETRY tmContext;
#endif
@ -45,14 +47,19 @@ Mutex::~Mutex ()
void Mutex::lock()
{
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmStartWaitForLock(0, 0, this, m->name);
#elif NV_USE_TELEMETRY
TmU64 matcher;
tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked");
#endif
EnterCriticalSection(&m->mutex);
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmEndWaitForLock(0);
tmAcquiredLock(0, 0, this, m->name);
#elif NV_USE_TELEMETRY
tmEndTryLockEx(tmContext, matcher, __FILE__, __LINE__, this, TMLR_SUCCESS);
tmSetLockState(tmContext, this, TMLS_LOCKED, "acquired");
#endif
@ -60,7 +67,18 @@ void Mutex::lock()
bool Mutex::tryLock()
{
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmStartWaitForLock(0, 0, this, m->name);
if (TryEnterCriticalSection(&m->mutex) != 0) {
tmEndWaitForLock(0);
tmAcquiredLock(0, 0, this, m->name);
return true;
}
else {
tmEndWaitForLock(0);
return false;
}
#elif NV_USE_TELEMETRY
TmU64 matcher;
tmTryLockEx(tmContext, &matcher, 100/*0.1 ms*/, __FILE__, __LINE__, this, "blocked");
if (TryEnterCriticalSection(&m->mutex) != 0) {
@ -79,7 +97,9 @@ bool Mutex::tryLock()
void Mutex::unlock()
{
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmReleasedLock(0, this);
#elif NV_USE_TELEMETRY
tmSetLockState(tmContext, this, TMLS_RELEASED, "released");
#endif
@ -90,13 +110,17 @@ void Mutex::unlock()
struct Mutex::Private {
pthread_mutex_t mutex;
pthread_mutexattr_t attr;
const char * name;
};
Mutex::Mutex (const char * name) : m(new Private)
{
int result = pthread_mutex_init(&m->mutex, NULL);
pthread_mutexattr_init(&m->attr);
pthread_mutexattr_settype(&m->attr, PTHREAD_MUTEX_RECURSIVE);
int result = pthread_mutex_init(&m->mutex, &m->attr);
//m->mutex = PTHREAD_MUTEX_INITIALIZER;
m->name = name;
nvDebugCheck(result == 0);
}
@ -105,6 +129,8 @@ Mutex::~Mutex ()
{
int result = pthread_mutex_destroy(&m->mutex);
nvDebugCheck(result == 0);
result = pthread_mutexattr_destroy(&m->attr);
nvDebugCheck(result == 0);
}
void Mutex::lock()

@ -9,7 +9,11 @@
#include <unistd.h> // usleep
#endif
#if NV_USE_TELEMETRY
#include "nvcore/StrLib.h"
#if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h>
extern HTELEMETRY tmContext;
#endif
@ -118,16 +122,12 @@ void Thread::start(ThreadFunc * func, void * arg)
nvDebugCheck(p->thread != NULL);
if (p->name != NULL) {
setThreadName(threadId, p->name);
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmThreadName(0, threadId, p->name);
#elif NV_USE_TELEMETRY
tmThreadName(tmContext, threadId, p->name);
#endif
}
#elif NV_OS_ORBIS
int ret = scePthreadCreate(&p->thread, NULL, threadFunc, p.ptr(), p->name ? p->name : "nv::Thread");
nvDebugCheck(ret == 0);
// use any non-system core
scePthreadSetaffinity(p->thread, 0x3F);
scePthreadSetprio(p->thread, (SCE_KERNEL_PRIO_FIFO_DEFAULT + SCE_KERNEL_PRIO_FIFO_HIGHEST) / 2);
#elif NV_OS_USE_PTHREAD
int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr());
nvDebugCheck(result == 0);

@ -8,7 +8,9 @@
#include "nvcore/Utils.h"
#include "nvcore/StrLib.h"
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
#include <rad_tm.h>
#elif NV_USE_TELEMETRY
#include <telemetry.h>
extern HTELEMETRY tmContext;
#endif
@ -84,7 +86,9 @@ AutoPtr<ThreadPool> s_pool;
}
{
#if NV_USE_TELEMETRY
#if NV_USE_TELEMETRY3
tmZone(0, TMZF_NONE, "worker");
#elif NV_USE_TELEMETRY
tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker");
#endif
func(s_pool->arg, s_pool->useCallingThread + i);
@ -116,11 +120,11 @@ ThreadPool::ThreadPool(uint workerCount/*=processorCount()*/, bool useThreadAffi
lockThreadToProcessor(0); // Calling thread always locked to processor 0.
}
for (uint i = 0; i < threadCount; i++) {
StringBuilder name;
for (uint i = 0; i < threadCount; i++) {
name.format("worker %d", i);
workers[i].setName(name.release()); // @Leak
workers[i].start(workerFunc, (void *)i);
workers[i].start(workerFunc, (void *)(uintptr_t)i);
}
allIdle = true;
@ -141,9 +145,6 @@ ThreadPool::~ThreadPool()
void ThreadPool::run(ThreadTask * func, void * arg)
{
// Wait until threads are idle.
wait();
start(func, arg);
if (useCallingThread) {

@ -86,6 +86,8 @@ uint nv::processorCount() {
return count;
#elif NV_OS_ORBIS
return 6;
#elif NV_OS_DURANGO
return 6;
#elif NV_OS_XBOX
return 3; // or 6?
#elif NV_OS_LINUX || NV_OS_NETBSD // Linux, Solaris, & AIX

@ -25,6 +25,7 @@
#include "BlockCompressor.h"
#include "OutputOptions.h"
#include "TaskDispatcher.h"
#include "CompressionOptions.h"
#include "nvimage/Image.h"
#include "nvimage/ColorBlock.h"
@ -33,6 +34,7 @@
#include "nvmath/Vector.inl"
#include "nvcore/Memory.h"
#include "nvcore/Array.inl"
#include <new> // placement new
@ -40,85 +42,13 @@
using namespace nv;
using namespace nvtt;
/*
// OpenMP
#if defined(HAVE_OPENMP)
#include <omp.h>
#endif
void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
const uint bs = blockSize();
const uint bw = (w + 3) / 4;
const uint bh = (h + 3) / 4;
#if defined(HAVE_OPENMP)
bool singleThreaded = false;
#else
bool singleThreaded = true;
#endif
// Use a single thread to compress small textures.
if (bw * bh < 16) singleThreaded = true;
if (singleThreaded)
{
nvDebugCheck(bs <= 16);
uint8 mem[16]; // @@ Output one row at a time!
for (int y = 0; y < int(h); y += 4) {
for (uint x = 0; x < w; x += 4) {
ColorBlock rgba;
rgba.init(w, h, data, x, y);
compressBlock(rgba, alphaMode, compressionOptions, mem);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, bs);
}
}
}
}
#if defined(HAVE_OPENMP)
else
{
const uint size = bs * bw * bh;
uint8 * mem = new uint8[size];
#pragma omp parallel
{
#pragma omp for
for (int i = 0; i < int(bw*bh); i++)
{
const uint x = i % bw;
const uint y = i / bw;
ColorBlock rgba;
rgba.init(w, h, data, 4*x, 4*y);
uint8 * ptr = mem + (y * bw + x) * bs;
compressBlock(rgba, alphaMode, compressionOptions, ptr);
} // omp for
} // omp parallel
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(mem, size);
}
delete [] mem;
}
#endif
}
*/
struct CompressorContext
{
nvtt::AlphaMode alphaMode;
AlphaMode alphaMode;
uint w, h, d;
const float * data;
const nvtt::CompressionOptions::Private * compressionOptions;
const CompressionOptions::Private * compressionOptions;
uint bw, bh, bs;
uint8 * mem;
@ -144,7 +74,7 @@ void ColorBlockCompressorTask(void * data, int i)
}
}
void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
void ColorBlockCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
@ -182,66 +112,6 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
delete [] context.mem;
}
#if 0
// Each task compresses one block.
void ColorSetCompressorTask(void * data, int i)
{
CompressorContext * d = (CompressorContext *) data;
uint x = i % d->bw;
uint y = i / d->bw;
//for (uint x = 0; x < d->bw; x++)
{
ColorSet set;
set.setColors(d->data, d->w, d->h, x * 4, y * 4);
uint8 * ptr = d->mem + (y * d->bw + x) * d->bs;
((ColorSetCompressor *)d->compressor)->compressBlock(set, d->alphaMode, *d->compressionOptions, ptr);
}
}
void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
CompressorContext context;
context.alphaMode = alphaMode;
context.w = w;
context.h = h;
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
context.compressor = this;
SequentialTaskDispatcher sequential;
// Use a single thread to compress small textures.
if (context.bh < 4) dispatcher = &sequential;
#if _DEBUG
dispatcher = &sequential;
#endif
const uint count = context.bw * context.bh;
const uint size = context.bs * count;
context.mem = new uint8[size];
dispatcher->dispatch(ColorSetCompressorTask, &context, count);
outputOptions.writeData(context.mem, size);
delete [] context.mem;
}
#endif // 0
// Each task compresses one block.
void FloatColorCompressorTask(void * data, int i)
{
@ -262,8 +132,8 @@ void FloatColorCompressorTask(void * data, int i)
Vector4 colors[16];
float weights[16];
const uint block_w = min(d->w - block_x * 4U, 4U);
const uint block_h = min(d->h - block_y * 4U, 4U);
const uint block_w = min(d->w - block_x * 4, 4U);
const uint block_h = min(d->h - block_y * 4, 4U);
uint x, y;
for (y = 0; y < block_h; y++) {
@ -274,7 +144,7 @@ void FloatColorCompressorTask(void * data, int i)
colors[dst_idx].y = g[src_idx];
colors[dst_idx].z = b[src_idx];
colors[dst_idx].w = a[src_idx];
weights[dst_idx] = (d->alphaMode == nvtt::AlphaMode_Transparency) ? a[src_idx] : 1.0f;
weights[dst_idx] = (d->alphaMode == AlphaMode_Transparency) ? saturate(a[src_idx]) : 1.0f;
}
for (; x < 4; x++) {
uint dst_idx = 4 * y + x;
@ -296,7 +166,7 @@ void FloatColorCompressorTask(void * data, int i)
}
void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1); // @@ Add support for compressed 3D textures.
@ -308,7 +178,7 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
context.data = data;
context.compressionOptions = &compressionOptions;
context.bs = blockSize();
context.bs = blockSize(compressionOptions);
context.bw = (w + 3) / 4;
context.bh = (h + 3) / 4;
@ -333,3 +203,466 @@ void FloatColorCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d,
delete [] context.mem;
}
// BC1
#include "CompressorDXT1.h"
void FastCompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1_fast(colors, weights, compressionOptions.colorWeight.xyz(), (BlockDXT1 *)output);
}
void CompressorDXT1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output);
}
// @@ BC1a
// @@ BC2
// @@ BC3
// BC3_RGBM
#include "CompressorDXT5_RGBM.h"
void CompressorBC3_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt5_rgbm(colors, weights, compressionOptions.rgbmThreshold, (BlockDXT5 *)output);
}
// ETC
#include "CompressorETC.h"
void CompressorETC1::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_etc1(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
void CompressorETC2_R::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Change radius based on quality.
compress_eac(colors, weights, /*input_channel=*/1, /*search_radius=*/1, /*use_11bit_mode=*/true, output);
}
void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
//compress_eac_rg(colors, weights, 1, 2, output);
}
void CompressorETC2_RGB::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Tweak quality options.
compress_etc2(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
void CompressorETC2_RGBA::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Tweak quality options.
// @@ Change radius based on quality.
compress_etc2_eac(colors, weights, compressionOptions.colorWeight.xyz(), output);
}
/*void CompressorETC2_RG::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ Change radius based on quality.
compress_eac_rg(colors, weights, compressionOptions.colorWeight.xyz(), output);
}*/
void CompressorETC2_RGBM::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
compress_etc2_rgbm(colors, weights, compressionOptions.rgbmThreshold, output);
}
// External compressors.
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
void AtiCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
// @@ Floating point input is not swizzled.
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
void SquishCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
nvDebugCheck(false);
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(InputFormat inputFormat, AlphaMode alphaMode, uint w, uint h, uint d, void * data, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)
#if defined(HAVE_ETCLIB)
#include "Etc.h"
void EtcLibCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
//nvCheck(d == 1); // Encode one layer at a time?
Etc::Image::Format format;
if (compressionOptions.format == Format_ETC1) {
format = Etc::Image::Format::ETC1;
}
else if (compressionOptions.format == Format_ETC2_R) {
format = Etc::Image::Format::R11;
}
else if (compressionOptions.format == Format_ETC2_RG) {
format = Etc::Image::Format::RG11;
}
else if (compressionOptions.format == Format_ETC2_RGB) {
format = Etc::Image::Format::RGB8;
//format = Etc::Image::Format::SRGB8;
}
else if (compressionOptions.format == Format_ETC2_RGBA) {
format = Etc::Image::Format::RGBA8;
//format = Etc::Image::Format::SRGBA8;
}
else if (compressionOptions.format == Format_ETC2_RGB_A1) {
format = Etc::Image::Format::RGB8A1;
//format = Etc::Image::Format::SRGB8A1;
}
else {
nvCheck(false);
return;
}
Etc::ErrorMetric error_metric = Etc::ErrorMetric::RGBA;
// @@ Use normal compression metric for normals?
//if (compressionOptions.)
// @@ Adjust based on quality.
int effort = ETCCOMP_DEFAULT_EFFORT_LEVEL;
// @@ What are the defaults?
uint jobs = 4;
uint max_jobs = 4;
uint8 * out_data = NULL;
uint out_size = 0;
uint out_width = 0;
uint out_height = 0;
int out_time = 0;
// Swizzle color data.
nv::Array<float> tmp;
uint count = w * h;
tmp.resize(4 * count);
for (uint i = 0; i < count; i++) {
tmp[4*i+0] = data[count*0 + i];
tmp[4*i+1] = data[count*1 + i];
tmp[4*i+2] = data[count*2 + i];
tmp[4*i+3] = data[count*3 + i];
}
Etc::Encode(tmp.buffer(), w, h, format, error_metric, effort, jobs, max_jobs, &out_data, &out_size, &out_width, &out_height, &out_time);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(out_data, I32(out_size));
}
}
#endif
#if defined(HAVE_RGETC)
#include "rg_etc1.h"
NV_AT_STARTUP(rg_etc1::pack_etc1_block_init());
void RgEtcCompressor::compressBlock(ColorBlock & rgba, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
rg_etc1::etc1_pack_params pack_params;
pack_params.m_quality = rg_etc1::cMediumQuality;
if (compressionOptions.quality == Quality_Fastest) pack_params.m_quality = rg_etc1::cLowQuality;
else if (compressionOptions.quality == Quality_Production) pack_params.m_quality = rg_etc1::cHighQuality;
else if (compressionOptions.quality == Quality_Highest) pack_params.m_quality = rg_etc1::cHighQuality;
else if (compressionOptions.quality == Quality_Normal) pack_params.m_quality = rg_etc1::cMediumQuality;
rgba.swizzle(2, 1, 0, 3);
rg_etc1::pack_etc1_block(output, (uint *)rgba.colors(), pack_params);
//Vector4 result[16];
//nv::decompress_etc(output, result);
}
#endif
#if defined(HAVE_PVRTEXTOOL)
#include <PVRTextureUtilities.h> // for CPVRTexture, CPVRTextureHeader, PixelType, Transcode
#include "nvmath/Color.inl"
void CompressorPVR::compress(AlphaMode alphaMode, uint w, uint h, uint d, const float * data, TaskDispatcher * dispatcher, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{
EPVRTColourSpace color_space = ePVRTCSpacelRGB;
//pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('b','g','r','a',8,8,8,8);
pvrtexture::PixelType src_pixel_type = pvrtexture::PixelType('r','g','b',0,8,8,8,0);
pvrtexture::CPVRTextureHeader header(src_pixel_type.PixelTypeID, w, h, d, 1/*num mips*/, 1/*num array*/, 1/*num faces*/, color_space, ePVRTVarTypeUnsignedByteNorm);
/*
uint count = w * h * d;
Array<Color32> tmp;
tmp.resize(count);
for (uint i = 0; i < count; i++) {
tmp[i] = toColor32(Vector4(data[0*count + i], data[1*count + i], data[2*count + i], data[3*count + i]));
}
*/
uint count = w * h * d;
Array<uint8> tmp;
tmp.resize(3 * count);
for (uint i = 0; i < count; i++) {
tmp[3*i+0] = data[0*count + i] * 255.0f;
tmp[3*i+1] = data[1*count + i] * 255.0f;
tmp[3*i+2] = data[2*count + i] * 255.0f;
}
pvrtexture::CPVRTexture texture(header, tmp.buffer());
pvrtexture::PixelType dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB);
if (compressionOptions.format == Format_PVR_2BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGB);
else if (compressionOptions.format == Format_PVR_4BPP_RGB) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGB);
else if (compressionOptions.format == Format_PVR_2BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_2bpp_RGBA);
else if (compressionOptions.format == Format_PVR_4BPP_RGBA) dst_pixel_type = pvrtexture::PixelType(ePVRTPF_PVRTCI_4bpp_RGBA);
bool success = pvrtexture::Transcode(texture, dst_pixel_type, ePVRTVarTypeUnsignedByteNorm, color_space, pvrtexture::ePVRTCNormal, false);
if (success) {
uint size = 0;
if (compressionOptions.format == Format_PVR_2BPP_RGB || compressionOptions.format == Format_PVR_2BPP_RGBA) {
// 2 bpp
const uint bpp = 2u;
const uint block_size = 8u * 4u;
const uint size_factor=(block_size*bpp)>>3u;
const uint block_width=nv::max((w>>3u), 2u);
const uint block_height=nv::max((h>>2u), 2u);
size = d * block_width * block_height * size_factor;
}
else {
// 4 bpp
const uint bpp = 4u;
const uint block_size = 4u * 4u;
const uint size_factor = (block_size*bpp) >> 3u;
const uint block_width = max((w>>2u), 2u);
const uint block_height = max((h>>2u), 2u);
size = d * block_width * block_height * size_factor;
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(texture.getDataPtr(), I32(size));
}
}
}
#endif

@ -27,7 +27,6 @@
#include "Compressor.h"
namespace nv
{
struct ColorBlock;
@ -45,9 +44,149 @@ namespace nv
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize() const = 0;
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output) = 0;
virtual uint blockSize(const nvtt::CompressionOptions::Private & compressionOptions) const = 0;
};
// BC1
struct FastCompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
struct CompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
// BC3
struct CompressorBC3_RGBM : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; }
};
// ETC
struct CompressorETC1 : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 8; }
};
struct CompressorETC2_R : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; }
};
struct CompressorETC2_RG : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorETC2_RGB : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 8; }
};
struct CompressorETC2_RGBA : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorETC2_RGBM : public FloatColorCompressor
{
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private &) const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
#if NV_USE_CRUNCH
struct CrunchCompressorETC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if NV_USE_INTEL_ISPC_TC
struct IspcCompressorBC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorBC3 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorBC7 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct IspcCompressorETC1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_ETCLIB)
struct EtcLibCompressor : public CompressorInterface
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_RGETC)
struct RgEtcCompressor : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
#if defined(HAVE_PVRTEXTOOL)
struct CompressorPVR : public CompressorInterface
{
virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
} // nv namespace

@ -13,6 +13,7 @@ SET(NVTT_SRCS
CompressorDX11.h CompressorDX11.cpp
CompressorDXT1.h CompressorDXT1.cpp
CompressorDXT5_RGBM.h CompressorDXT5_RGBM.cpp
CompressorETC.h CompressorETC.cpp
CompressorRGB.h CompressorRGB.cpp
Context.h Context.cpp
QuickCompressDXT.h QuickCompressDXT.cpp
@ -38,6 +39,7 @@ IF (CUDA_FOUND)
ENDIF (CUDA_FOUND)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/rg_etc1_v104)
ADD_DEFINITIONS(-DNVTT_EXPORTS)
@ -47,7 +49,7 @@ ELSE(NVTT_SHARED)
ADD_LIBRARY(nvtt ${NVTT_SRCS})
ENDIF(NVTT_SHARED)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath)
TARGET_LINK_LIBRARIES(nvtt ${LIBS} nvcore nvimage nvthread squish bc6h bc7 nvmath rg_etc1)
INSTALL(TARGETS nvtt
RUNTIME DESTINATION bin

@ -38,79 +38,6 @@ ClusterFit::ClusterFit()
{
}
#if 0 // @@ Deprecate. Do not use color set directly.
void ClusterFit::setColorSet(const ColorSet * set)
{
// initialise the best error
#if NVTT_USE_SIMD
m_besterror = SimdVector( FLT_MAX );
Vector3 metric = m_metric.toVector3();
#else
m_besterror = FLT_MAX;
Vector3 metric = m_metric;
#endif
// cache some values
m_count = set->colorCount;
Vector3 values[16];
for (uint i = 0; i < m_count; i++)
{
values[i] = set->colors[i].xyz();
}
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric);
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric);
// build the list of values
int order[16];
float dps[16];
for (uint i = 0; i < m_count; ++i)
{
dps[i] = dot(values[i], principal);
order[i] = i;
}
// stable sort
for (uint i = 0; i < m_count; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
// weight all the points
#if NVTT_USE_SIMD
m_xxsum = SimdVector( 0.0f );
m_xsum = SimdVector( 0.0f );
#else
m_xxsum = Vector3(0.0f);
m_xsum = Vector3(0.0f);
m_wsum = 0.0f;
#endif
for (uint i = 0; i < m_count; ++i)
{
int p = order[i];
#if NVTT_USE_SIMD
NV_ALIGN_16 Vector4 tmp(values[p], 1);
m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]);
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
#else
m_weighted[i] = values[p] * set->weights[p];
m_xxsum += m_weighted[i] * m_weighted[i];
m_xsum += m_weighted[i];
m_weights[i] = set->weights[p];
m_wsum += m_weights[i];
#endif
}
}
#endif // 0
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
{
// initialise the best error

@ -50,6 +50,7 @@ void CompressionOptions::reset()
m.format = Format_DXT1;
m.quality = Quality_Normal;
m.colorWeight.set(1.0f, 1.0f, 1.0f, 1.0f);
m.rgbmThreshold = 0.15f;
m.bitcount = 32;
m.bmask = 0x000000FF;
@ -102,6 +103,11 @@ void CompressionOptions::setColorWeights(float red, float green, float blue, flo
m.colorWeight.set(red, green, blue, alpha);
}
void CompressionOptions::setRGBMThreshold(float min_m)
{
m.rgbmThreshold = min_m;
}
/// Set color mask to describe the RGB/RGBA format.
void CompressionOptions::setPixelFormat(uint bitCount, uint rmask, uint gmask, uint bmask, uint amask)
@ -162,7 +168,7 @@ void CompressionOptions::setPixelType(PixelType pixelType)
/// Set pitch alignment in bytes.
void CompressionOptions::setPitchAlignment(int pitchAlignment)
{
nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(pitchAlignment));
nvDebugCheck(pitchAlignment > 0 && isPowerOfTwo(U32(pitchAlignment)));
m.pitchAlignment = pitchAlignment;
}
@ -194,6 +200,10 @@ void CompressionOptions::setTargetDecoder(Decoder decoder)
}
Format CompressionOptions::format() const
{
return m.format;
}
// Translate to and from D3D formats.
unsigned int CompressionOptions::d3d9Format() const
@ -246,10 +256,20 @@ unsigned int CompressionOptions::d3d9Format() const
FOURCC_ATI2, // Format_BC5
FOURCC_DXT1, // Format_DXT1n
0, // Format_CTX1
MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6
MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7
//FOURCC_ATI2, // Format_BC5_Luma
FOURCC_BC6H, // Format_BC6
FOURCC_BC7L, // Format_BC7
FOURCC_DXT5, // Format_BC3_RGBM
NV_MAKEFOURCC('E', 'T', 'C', '1'), // Format_ETC1
0, // Format_ETC2_R
0, // Format_ETC2_RG
NV_MAKEFOURCC('E', 'T', 'C', '2'), // Format_ETC2_RGB
0, // Format_ETC2_RGBA
0, // Format_ETC2_RGB_A1
0, // Format_ETC2_RGBM
FOURCC_PVR0,
FOURCC_PVR1,
FOURCC_PVR2,
FOURCC_PVR3,
};
NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count);
@ -258,12 +278,80 @@ unsigned int CompressionOptions::d3d9Format() const
}
}
/*
bool CompressionOptions::setDirect3D9Format(unsigned int format)
unsigned int CompressionOptions::dxgiFormat() const // @@ Add srgb flag.
{
if (m.format == Format_RGB) {
if (m.pixelType == PixelType_UnsignedNorm) {
uint bitcount = m.bitcount;
uint rmask = m.rmask;
uint gmask = m.gmask;
uint bmask = m.bmask;
uint amask = m.amask;
if (bitcount == 0) {
bitcount = m.rsize + m.gsize + m.bsize + m.asize;
rmask = ((1 << m.rsize) - 1) << (m.asize + m.bsize + m.gsize);
gmask = ((1 << m.gsize) - 1) << (m.asize + m.bsize);
bmask = ((1 << m.bsize) - 1) << m.asize;
amask = ((1 << m.asize) - 1) << 0;
}
if (bitcount <= 32) {
return nv::findDXGIFormat(bitcount, rmask, gmask, bmask, amask);
}
else {
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_UNORM;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_UNORM;
}
}
else if (m.pixelType == PixelType_Float) {
if (m.rsize == 16 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16_FLOAT;
if (m.rsize == 32 && m.gsize == 0 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32_FLOAT;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R16G16_FLOAT;
if (m.rsize == 32 && m.gsize == 32 && m.bsize == 0 && m.asize == 0) return DXGI_FORMAT_R32G32_FLOAT;
if (m.rsize == 16 && m.gsize == 16 && m.bsize == 16 && m.asize == 16) return DXGI_FORMAT_R16G16B16A16_FLOAT;
if (m.rsize == 32 && m.gsize == 32 && m.bsize == 32 && m.asize == 32) return DXGI_FORMAT_R32G32B32A32_FLOAT;
}
return 0;
}
else {
uint dxgi_formats[] = {
0, // Format_RGB,
DXGI_FORMAT_BC1_UNORM, // Format_DXT1
DXGI_FORMAT_BC1_UNORM, // Format_DXT1a
DXGI_FORMAT_BC2_UNORM, // Format_DXT3
DXGI_FORMAT_BC3_UNORM, // Format_DXT5
DXGI_FORMAT_BC3_UNORM, // Format_DXT5n
DXGI_FORMAT_BC4_UNORM, // Format_BC4
DXGI_FORMAT_BC5_UNORM, // Format_BC5
DXGI_FORMAT_BC1_UNORM, // Format_DXT1n
0, // Format_CTX1
DXGI_FORMAT_BC6H_UF16, // Format_BC6
DXGI_FORMAT_BC7_UNORM, // Format_BC7
DXGI_FORMAT_BC5_UNORM, // Format_BC3_RGBM
0, // Format_ETC1
0, // Format_ETC2_R
0, // Format_ETC2_RG
0, // Format_ETC2_RGB
0, // Format_ETC2_RGBA
0, // Format_ETC2_RGB_A1
0, // Format_ETC2_RGBM
0, // Format_PVR_2BPP_RGB
0, // Format_PVR_4BPP_RGB
0, // Format_PVR_2BPP_RGBA
0, // Format_PVR_4BPP_RGB
};
NV_COMPILER_CHECK(NV_ARRAY_SIZE(dxgi_formats) == Format_Count);
return dxgi_formats[m.format];
}
}
unsigned int CompressionOptions::dxgiFormat() const
/*
bool CompressionOptions::setDirect3D9Format(unsigned int format)
{
}

@ -39,6 +39,7 @@ namespace nvtt
Quality quality;
nv::Vector4 colorWeight;
float rgbmThreshold;
// Pixel format description.
uint bitcount;

@ -30,6 +30,7 @@
namespace nv
{
struct CompressorInterface
{
virtual ~CompressorInterface() {}

@ -39,7 +39,7 @@ using namespace nv;
using namespace nvtt;
void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
void CompressorBC6::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
@ -77,7 +77,7 @@ void CompressorBC6::compressBlock(const Vector4 colors[16], const float weights[
ZOH::compress(zohTile, (char *)output);
}
void CompressorBC7::compressBlock(const Vector4 colors[16], const float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
void CompressorBC7::compressBlock(Vector4 colors[16], float weights[16], const CompressionOptions::Private & compressionOptions, void * output)
{
// !!!UNDONE: support channel weights
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)

@ -30,14 +30,14 @@ namespace nv
{
struct CompressorBC6 : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
struct CompressorBC7 : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
virtual void compressBlock(Vector4 colors[16], float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize(const nvtt::CompressionOptions::Private & ) const { return 16; }
};
} // nv namespace

@ -28,7 +28,7 @@
#include "CompressionOptions.h"
#include "OutputOptions.h"
#include "ClusterFit.h"
#include "CompressorDXT1.h"
//#include "CompressorDXT1.h"
#include "CompressorDXT5_RGBM.h"
// squish
@ -48,45 +48,11 @@
#include <new> // placement new
// s3_quant
#if defined(HAVE_S3QUANT)
#include "s3tc/s3_quant.h"
#endif
// ati tc
#if defined(HAVE_ATITC)
typedef int BOOL;
typedef _W64 unsigned long ULONG_PTR;
typedef ULONG_PTR DWORD_PTR;
#include "atitc/ATI_Compress.h"
#endif
// squish
#if defined(HAVE_SQUISH)
//#include "squish/squish.h"
#include "squish-1.10/squish.h"
#endif
// d3dx
#if defined(HAVE_D3DX)
#include <d3dx9.h>
#endif
// stb
#if defined(HAVE_STB)
#define STB_DEFINE
#include "stb/stb_dxt.h"
#endif
using namespace nv;
using namespace nvtt;
void FastCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
BlockDXT1 * block = new(output) BlockDXT1;
QuickCompress::compressDXT1(rgba, block);
}
void FastCompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
@ -115,39 +81,13 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
}
#if 1
void CompressorDXT1::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
compress_dxt1(colors, weights, compressionOptions.colorWeight.xyz(), /*three_color_mode*/true, (BlockDXT1 *)output);
}
#else
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
if (rgba.isSingleColor())
{
BlockDXT1 * block = new(output) BlockDXT1;
OptimalCompress::compressDXT1(rgba.color(0), block);
}
else
{
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
fit.SetColourSet(&colours, nvsquish::kDxt1);
fit.Compress(output);
}
}
#endif
void CompressorDXT1a::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
uint alphaMask = 0;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a == 0) alphaMask |= (3 << (i * 2)); // Set two bits for each color.
if (rgba.color(i).a == 0) alphaMask |= (3U << (i * 2U)); // Set two bits for each color.
}
const bool isSingleColor = rgba.isSingleColor();
@ -284,216 +224,6 @@ void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode
}
void CompressorBC3_RGBM::compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
float min_m = 0.25f; // @@ Get from compression options.
compress_dxt5_rgbm(colors, weights, min_m, (BlockDXT5 *)output);
}
#if defined(HAVE_ATITC)
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
// @@ Floating point input is not swizzled.
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT1;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
ATI_TC_CompressOptions options;
options.dwSize = sizeof(options);
options.bUseChannelWeighting = false;
options.bUseAdaptiveWeighting = false;
options.bDXT1UseAlpha = false;
options.nCompressionSpeed = ATI_TC_Speed_Normal;
options.bDisableMultiThreading = false;
//options.bDisableMultiThreading = true;
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, &options, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
void AtiCompressorDXT5::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
// Init source texture
ATI_TC_Texture srcTexture;
srcTexture.dwSize = sizeof(srcTexture);
srcTexture.dwWidth = w;
srcTexture.dwHeight = h;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
srcTexture.dwPitch = w * 4;
srcTexture.format = ATI_TC_FORMAT_ARGB_8888;
}
else
{
srcTexture.dwPitch = w * 16;
srcTexture.format = ATI_TC_FORMAT_ARGB_32F;
}
srcTexture.dwDataSize = ATI_TC_CalculateBufferSize(&srcTexture);
srcTexture.pData = (ATI_TC_BYTE*) data;
// Init dest texture
ATI_TC_Texture destTexture;
destTexture.dwSize = sizeof(destTexture);
destTexture.dwWidth = w;
destTexture.dwHeight = h;
destTexture.dwPitch = 0;
destTexture.format = ATI_TC_FORMAT_DXT5;
destTexture.dwDataSize = ATI_TC_CalculateBufferSize(&destTexture);
destTexture.pData = (ATI_TC_BYTE*) mem::malloc(destTexture.dwDataSize);
// Compress
ATI_TC_ConvertTexture(&srcTexture, &destTexture, NULL, NULL, NULL, NULL);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(destTexture.pData, destTexture.dwDataSize);
}
mem::free(destTexture.pData);
}
#endif // defined(HAVE_ATITC)
#if defined(HAVE_SQUISH)
void SquishCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
nvDebugCheck(false);
#pragma message(NV_FILE_LINE "TODO: Convert input to fixed point ABGR format instead of ARGB")
/*
Image img(*image);
int count = img.width() * img.height();
for (int i = 0; i < count; i++)
{
Color32 c = img.pixel(i);
img.pixel(i) = Color32(c.b, c.g, c.r, c.a);
}
int size = squish::GetStorageRequirements(img.width(), img.height(), squish::kDxt1);
void * blocks = mem::malloc(size);
squish::CompressImage((const squish::u8 *)img.pixels(), img.width(), img.height(), blocks, squish::kDxt1 | squish::kColourClusterFit);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(blocks, size);
}
mem::free(blocks);
*/
}
#endif // defined(HAVE_SQUISH)
#if defined(HAVE_D3DX)
void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(d == 1);
IDirect3D9 * d3d = Direct3DCreate9(D3D_SDK_VERSION);
D3DPRESENT_PARAMETERS presentParams;
ZeroMemory(&presentParams, sizeof(presentParams));
presentParams.Windowed = TRUE;
presentParams.SwapEffect = D3DSWAPEFFECT_COPY;
presentParams.BackBufferWidth = 8;
presentParams.BackBufferHeight = 8;
presentParams.BackBufferFormat = D3DFMT_UNKNOWN;
HRESULT err;
IDirect3DDevice9 * device = NULL;
err = d3d->CreateDevice(D3DADAPTER_DEFAULT, D3DDEVTYPE_REF, GetDesktopWindow(), D3DCREATE_SOFTWARE_VERTEXPROCESSING, &presentParams, &device);
IDirect3DTexture9 * texture = NULL;
err = D3DXCreateTexture(device, w, h, 1, 0, D3DFMT_DXT1, D3DPOOL_SYSTEMMEM, &texture);
IDirect3DSurface9 * surface = NULL;
err = texture->GetSurfaceLevel(0, &surface);
RECT rect;
rect.left = 0;
rect.top = 0;
rect.bottom = h;
rect.right = w;
if (inputFormat == nvtt::InputFormat_BGRA_8UB)
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A8R8G8B8, w * 4, NULL, &rect, D3DX_DEFAULT, 0);
}
else
{
err = D3DXLoadSurfaceFromMemory(surface, NULL, NULL, data, D3DFMT_A32B32G32R32F, w * 16, NULL, &rect, D3DX_DEFAULT, 0);
}
if (err != D3DERR_INVALIDCALL && err != D3DXERR_INVALIDDATA)
{
D3DLOCKED_RECT rect;
ZeroMemory(&rect, sizeof(rect));
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}
surface->Release();
device->Release();
d3d->Release();
}
#endif // defined(HAVE_D3DX)
#if defined(HAVE_STB)
void StbCompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
{
rgba.swizzle(2, 1, 0, 3); // Swap R and B
stb_compress_dxt_block((unsigned char *)output, (unsigned char *)rgba.colors(), 0, 0);
}
#endif // defined(HAVE_STB)

@ -32,12 +32,6 @@ namespace nv
struct ColorBlock;
// Fast CPU compressors.
struct FastCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
struct FastCompressorDXT1a : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
@ -64,19 +58,6 @@ namespace nv
// Normal CPU compressors.
#if 1
struct CompressorDXT1 : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#else
struct CompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
struct CompressorDXT1a : public ColorBlockCompressor
{
@ -108,47 +89,9 @@ namespace nv
virtual uint blockSize() const { return 16; }
};
struct CompressorBC3_RGBM : public FloatColorCompressor
{
virtual void compressBlock(const Vector4 colors[16], const float weights[16], const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 16; }
};
// External compressors.
#if defined(HAVE_ATITC)
struct AtiCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
struct AtiCompressorDXT5 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_SQUISH)
struct SquishCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_D3DX)
struct D3DXCompressorDXT1 : public CompressorInterface
{
virtual void compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
};
#endif
#if defined(HAVE_STB)
struct StbCompressorDXT1 : public ColorBlockCompressor
{
virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
virtual uint blockSize() const { return 8; }
};
#endif
} // nv namespace

@ -218,13 +218,13 @@ static int evaluate_mse(const Color32 & p, const Color32 & c) {
return (square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b));
}
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) {
/*static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, const Vector3 & w) {
float e0 = evaluate_mse(palette[0], c, w);
float e1 = evaluate_mse(palette[1], c, w);
float e2 = evaluate_mse(palette[2], c, w);
float e3 = evaluate_mse(palette[3], c, w);
return min(min(e0, e1), min(e2, e3));
}
}*/
static int evaluate_mse(const Color32 palette[4], const Color32 & c) {
int e0 = evaluate_mse(palette[0], c);
@ -337,7 +337,7 @@ static void evaluate_palette(Color16 c0, Color16 c1, Vector3 palette[4]) {
}
}
static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
/*static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
nvDebugCheck(c0.u > c1.u);
Color32 palette32[4];
@ -346,7 +346,7 @@ static void evaluate_palette3(Color16 c0, Color16 c1, Vector3 palette[4]) {
for (int i = 0; i < 4; i++) {
palette[i] = color_to_vector3(palette32[i]);
}
}
}*/
@ -491,7 +491,8 @@ float nv::compress_dxt1_single_color(const Vector3 * colors, const float * weigh
// Decompress block color.
Color32 palette[4];
output->evaluatePalette(palette, /*d3d9=*/false);
evaluate_palette(output->col0, output->col1, palette);
//output->evaluatePalette(palette, /*d3d9=*/false);
Vector3 block_color = color_to_vector3(palette[output->indices & 0x3]);
@ -668,7 +669,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight
// This is too expensive, even with a low threshold.
// If high quality:
if (0) {
if (/* DISABLES CODE */ (0)) {
BlockDXT1 exhaustive_output;
float exhaustive_error = compress_dxt1_bounding_box_exhaustive(input_colors, colors, weights, count, color_weights, three_color_mode, 1400, &exhaustive_output);
@ -720,7 +721,7 @@ float nv::compress_dxt1(const Vector4 input_colors[16], const float input_weight
// Least squares fitting of color end points for the given indices. @@ Take weights into account.
static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b)
static bool optimize_end_points4(uint indices, const Vector4 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
@ -739,8 +740,8 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * colors[i];
betax_sum += beta * colors[i];
alphax_sum += alpha * colors[i].xyz();
betax_sum += beta * colors[i].xyz();
}
float denom = alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum;
@ -756,7 +757,7 @@ static bool optimize_end_points4(uint indices, const Vector3 * colors, const Vec
// Least squares fitting of color end points for the given indices. @@ This does not support black/transparent index. @@ Take weights into account.
static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vector3 * weights, int count, Vector3 * a, Vector3 * b)
static bool optimize_end_points3(uint indices, const Vector3 * colors, /*const float * weights,*/ int count, Vector3 * a, Vector3 * b)
{
float alpha2_sum = 0.0f;
float beta2_sum = 0.0f;
@ -794,6 +795,90 @@ static bool optimize_end_points3(uint indices, const Vector3 * colors, const Vec
// find minimum and maximum colors based on bounding box in color space
inline static void fit_colors_bbox(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
*c0 = Vector3(0);
*c1 = Vector3(255);
for (int i = 0; i < count; i++) {
*c0 = max(*c0, colors[i]);
*c1 = min(*c1, colors[i]);
}
}
inline static void select_diagonal(const Vector3 * colors, int count, Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 center = (*c0 + *c1) * 0.5f;
Vector2 covariance = Vector2(0);
for (int i = 0; i < count; i++) {
Vector3 t = colors[i] - center;
covariance += t.xy() * t.z;
}
float x0 = c0->x;
float y0 = c0->y;
float x1 = c1->x;
float y1 = c1->y;
if (covariance.x < 0) {
swap(x0, x1);
}
if (covariance.y < 0) {
swap(y0, y1);
}
c0->set(x0, y0, c0->z);
c1->set(x1, y1, c1->z);
}
inline static void inset_bbox(Vector3 * restrict c0, Vector3 * restrict c1)
{
Vector3 inset = (*c0 - *c1) / 16.0f - (8.0f / 255.0f) / 16.0f;
*c0 = clamp(*c0 - inset, 0.0f, 255.0f);
*c1 = clamp(*c1 + inset, 0.0f, 255.0f);
}
float nv::compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output)
{
Vector3 colors[16];
float weights[16];
int count = reduce_colors(input_colors, input_weights, colors, weights);
if (count == 0) {
// Output trivial block.
output->col0.u = 0;
output->col1.u = 0;
output->indices = 0;
return 0;
}
float error = FLT_MAX;
error = compress_dxt1_single_color(colors, weights, count, color_weights, output);
if (error == 0.0f || count == 1) {
// Early out.
return error;
}
// Quick end point selection.
Vector3 c0, c1;
fit_colors_bbox(colors, count, &c0, &c1);
select_diagonal(colors, count, &c0, &c1);
inset_bbox(&c0, &c1);
output_block4(input_colors, color_weights, c0, c1, output);
// Refine color for the selected indices.
if (optimize_end_points4(output->indices, input_colors, 16, &c0, &c1)) {
output_block4(input_colors, color_weights, c0, c1, output);
}
return evaluate_mse(input_colors, input_weights, color_weights, output);
}

@ -13,11 +13,14 @@ namespace nv {
float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output);
float compress_dxt1_single_color(const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
//float compress_dxt1_least_squares_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, BlockDXT1 * output);
float compress_dxt1_bounding_box_exhaustive(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, int search_limit, BlockDXT1 * output);
void compress_dxt1_cluster_fit(const Vector4 input_colors[16], const Vector3 * colors, const float * weights, int count, const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Cluster fit end point selection.
float compress_dxt1(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, bool three_color_mode, BlockDXT1 * output);
// Quick end point selection followed by least squares refinement.
float compress_dxt1_fast(const Vector4 input_colors[16], const float input_weights[16], const Vector3 & color_weights, BlockDXT1 * output);
}

@ -3,6 +3,7 @@
#include "OptimalCompressDXT.h"
#include "QuickCompressDXT.h"
#include "CompressorETC.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
@ -17,14 +18,9 @@
using namespace nv;
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
static void convert_to_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, Vector4 rgbm_colors[16], float rgb_weights[16]) {
float weight_sum = 0;
@ -41,7 +37,7 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
float b = B / M;
float a = (M - min_m) / (1 - min_m);
input_colors_rgbm[i] = Vector4(r, g, b, a);
rgbm_colors[i] = Vector4(r, g, b, a);
rgb_weights[i] = input_weights[i] * M;
weight_sum += input_weights[i];
}
@ -50,6 +46,18 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
for (uint i = 0; i < 16; i++) rgb_weights[i] = 1;
}
}
//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
// Convert to RGBM.
Vector4 input_colors_rgbm[16]; // @@ Write over input_colors?
float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, input_colors_rgbm, rgb_weights);
// Compress RGB.
compress_dxt1(input_colors_rgbm, rgb_weights, Vector3(1), /*three_color_mode=*/false, &output->color);
@ -138,291 +146,61 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
}
float nv::compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output) {
// Convert to RGBM.
Vector4 rgbm_colors[16];
float rgb_weights[16];
convert_to_rgbm(input_colors, input_weights, min_m, rgbm_colors, rgb_weights);
#if 0
BlockDXT5 * block = new(output)BlockDXT5;
// Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.
// Compress the resulting M values optimally.
// Repeat this several times until compression error does not improve?
//Vector3 rgb_block[16];
//float m_block[16];
// Init RGB/M block.
#if 0
nvsquish::WeightedClusterFit fit;
ColorBlock rgba;
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgba.color(i) = toColor32(Vector4(r, g, b, a));
}
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}
#endif
#if 1
ColorSet rgb;
rgb.allocate(4, 4);
for (uint i = 0; i < 16; i++) {
const Vector4 & c = colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float M = max(max(R, G), max(B, min_m));
float r = R / M;
float g = G / M;
float b = B / M;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(weights[i], 0.001f);// weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setColorWeights(compressionOptions.colorWeight);
fit.setColorSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
void * etc_output = (uint8 *)output + 8;
void * eac_output = output;
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
// Compress RGB.
compress_etc2(rgbm_colors, rgb_weights, Vector3(1), etc_output);
// Decompress RGB/M block.
nv::ColorBlock RGB;
block->color.decodeBlock(&RGB);
decompress_etc(etc_output, rgbm_colors);
#if 1
AlphaBlock4x4 M;
// Compute M values to compensate for RGB's error.
for (int i = 0; i < 16; i++) {
const Vector4 & c = colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m));
//float m = max(max(R, G), max(B, min_m));
m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = weights[i];
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#else
OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);
#endif
#if 0
// Decompress M.
block->alpha.decodeBlock(&M);
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
for (uint i = 0; i < src.colorCount; i++) {
const Vector4 & c = src.color(i);
const Vector4 & c = input_colors[i];
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
//float m = max(max(R, G), max(B, min_m));
float m = float(M.alpha[i]) / 255.0f * (1 - min_m) + min_m;
float r = R / m;
float g = G / m;
float b = B / m;
float a = c.w;
rgb.colors[i] = Vector4(r, g, b, a);
rgb.indices[i] = i;
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
}
float rm = rgbm_colors[i].x;
float gm = rgbm_colors[i].y;
float bm = rgbm_colors[i].z;
rgb.createMinimalSet(/*ignoreTransparent=*/true);
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
}
else {
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&rgb);
Vector3 start, end;
fit.compress4(&start, &end);
QuickCompress::outputBlock4(rgb, start, end, &block->color);
}
#endif
// compute m such that m * (r/M, g/M, b/M) == RGB
#if 0
block->color.decodeBlock(&RGB);
// Three equations, one unknown:
// m * r/M == R
// m * g/M == G
// m * b/M == B
//AlphaBlock4x4 M;
//M.initWeights(src);
// Solve in the least squares sense!
for (int i = 0; i < 16; i++) {
const Vector4 & c = src.color(i);
float R = saturate(c.x);
float G = saturate(c.y);
float B = saturate(c.z);
// m (rm gm bm) (rm gm bm)^T == (rm gm bm) (R G B)^T
float r = RGB.color(i).r / 255.0f;
float g = RGB.color(i).g / 255.0f;
float b = RGB.color(i).b / 255.0f;
// m == dot(rgb, RGB) / dot(rgb, rgb)
float m = (R / r + G / g + B / b) / 3.0f;
//float m = max((R / r + G / g + B / b) / 3.0f, min_m);
//float m = max(max(R / r, G / g), max(B / b, min_m));
//float m = max(max(R, G), max(B, min_m));
float m = dot(Vector3(rm, gm, bm), Vector3(R, G, B)) / dot(Vector3(rm, gm, bm), Vector3(rm, gm, bm));
if (!isFinite(m)) {
m = 1;
}
m = (m - min_m) / (1 - min_m);
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = src.weights[i];
// Store M in alpha channel.
rgbm_colors[i].w = saturate(m); // @@ What it we don't saturate?
}
// Compress M.
if (compressionOptions.quality == Quality_Fastest) {
QuickCompress::compressDXT5A(M, &block->alpha);
}
else {
OptimalCompress::compressDXT5A(M, &block->alpha);
}
#endif
compress_eac(rgbm_colors, input_weights, /*input_channel=*/3, /*search_radius=*/1, /*11bit_mode*/false, eac_output);
return 0; // @@ Compute error.
}
#if 0
src.fromRGBM(M, min_m);
src.createMinimalSet(/*ignoreTransparent=*/true);
if (src.isSingleColor(/*ignoreAlpha=*/true)) {
OptimalCompress::compressDXT1(src.color(0), &block->color);
}
else {
// @@ Use our improved compressor.
ClusterFit fit;
fit.setMetric(compressionOptions.colorWeight);
fit.setColourSet(&src);
Vector3 start, end;
fit.compress4(&start, &end);
if (fit.compress3(&start, &end)) {
QuickCompress::outputBlock3(src, start, end, block->color);
}
else {
QuickCompress::outputBlock4(src, start, end, block->color);
}
}
#endif // 0
// @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?
// RGBM encoding.
// Maximize precision.
// - Number of possible grey levels:
// - Naive: 2^3 = 8
// - Better: 2^3 + 2^2 = 12
// - How to choose min_m?
// - Ideal = Adaptive per block, don't know where to store.
// - Adaptive per lightmap. How to compute optimal?
// - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.
// - Optimal compressor: Interpolation artifacts.
// - Color transform.
// - Measure error in post-tone-mapping color space.
// - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.
// - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.
// - Enhanced DXT compressor.
// - Typical RGBM encoding as follows:
// rgb -> M = max(rgb), RGB=rgb/M -> RGBM
// - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.
// - We could ensure that M' >= M during compression.
// - We could clamp RGB anyway.
// - We could add a fixed scale value to take into account compression errors and avoid clamping.
// Compress color.
/*if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
}
else
{
nvsquish::WeightedClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
int flags = 0;
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
fit.SetColourSet(&colours, 0);
fit.Compress(&block->color);
}*/
#endif // 0

@ -5,5 +5,5 @@ namespace nv {
class Vector4;
float compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output);
float compress_etc2_rgbm(Vector4 input_colors[16], float input_weights[16], float min_m, void * output);
}

File diff suppressed because it is too large Load Diff

@ -0,0 +1,20 @@
#include "nvcore/nvcore.h"
namespace nv {
class Vector3;
class Vector4;
void decompress_etc(const void * input_block, Vector4 output_colors[16]);
void decompress_eac(const void * input_block, Vector4 output_colors[16], int output_channel);
void decompress_etc_eac(const void * input_block, Vector4 output_colors[16]);
float compress_etc1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_etc2(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_etc2_a1(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
float compress_eac(Vector4 input_colors[16], float input_weights[16], int input_channel, int search_radius, bool use_11bit_mode, void * output);
float compress_etc2_eac(Vector4 input_colors[16], float input_weights[16], const Vector3 & color_weights, void * output);
}

@ -250,6 +250,8 @@ namespace
// Compute shared exponent.
int exp_shared_p = max(-B-1, ftoi_floor(log2f(max_c))) + 1 + B;
nvDebugCheck(exp_shared_p <= Emax);
nvDebugCheck(exp_shared_p >= 0);
int max_s = ftoi_round(max_c / (1 << (exp_shared_p - B - N)));
@ -279,7 +281,7 @@ namespace
{
float v = max3(r, g, b);
uint rgbe;
uint rgbe = 0;
if (v < 1e-32) {
rgbe = 0;
@ -534,6 +536,7 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
}
else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) {
// @@
ir = ig = ib = ia = 0;
}
else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) {
ir = iround(clamp(r, 0.0f, 65535.0f));
@ -543,6 +546,11 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
}
else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) {
// @@
ir = ig = ib = ia = 0;
}
else {
// @@
ir = ig = ib = ia = 0;
}
uint p = 0;

@ -39,6 +39,7 @@
#include "cuda/CudaCompressorDXT.h"
#include "nvimage/DirectDrawSurface.h"
#include "nvimage/KtxFile.h"
#include "nvimage/ColorBlock.h"
#include "nvimage/BlockDXT.h"
#include "nvimage/Image.h"
@ -51,6 +52,7 @@
#include "nvcore/Memory.h"
#include "nvcore/Ptr.h"
#include "nvcore/Array.inl"
using namespace nv;
using namespace nvtt;
@ -222,11 +224,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
return false;
}
nvtt::Surface img;
img.setWrapMode(inputOptions.wrapMode);
img.setAlphaMode(inputOptions.alphaMode);
img.setNormalMap(inputOptions.isNormalMap);
const int faceCount = inputOptions.faceCount;
int width = inputOptions.width;
int height = inputOptions.height;
@ -244,12 +241,19 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (inputOptions.maxLevel > 0) mipmapCount = min(mipmapCount, inputOptions.maxLevel);
}
if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, img.isNormalMap(), compressionOptions, outputOptions)) {
if (!outputHeader(inputOptions.textureType, width, height, depth, arraySize, mipmapCount, inputOptions.isNormalMap, compressionOptions, outputOptions)) {
return false;
}
// Output images.
if (outputOptions.container != Container_KTX)
{
nvtt::Surface img;
img.setWrapMode(inputOptions.wrapMode);
img.setAlphaMode(inputOptions.alphaMode);
img.setNormalMap(inputOptions.isNormalMap);
// Output each face from the largest mipmap to the smallest.
for (int f = 0; f < faceCount; f++)
{
int w = width;
@ -263,7 +267,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
if (inputOptions.convertToNormalMap) {
img.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
img.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
img.packNormals();
}
// To linear space.
@ -337,6 +340,133 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
compress(tmp, f, m, compressionOptions, outputOptions);
}
}
}
else
{
// KTX files expect face mipmaps to be interleaved.
Array<nvtt::Surface> images(faceCount);
Array<bool> mipChainBroken(faceCount);
int w = width;
int h = height;
int d = depth;
// https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16
uint imageSize = estimateSize(w, h, 1, 1, compressionOptions) * faceCount;
outputOptions.writeData(&imageSize, sizeof(uint32));
for (int f = 0; f < faceCount; f++)
{
nvtt::Surface s;
s.setWrapMode(inputOptions.wrapMode);
s.setAlphaMode(inputOptions.alphaMode);
s.setNormalMap(inputOptions.isNormalMap);
s.setImage(inputOptions.inputFormat, inputOptions.width, inputOptions.height, inputOptions.depth, inputOptions.images[f]);
// To normal map.
if (inputOptions.convertToNormalMap) {
s.toGreyScale(inputOptions.heightFactors.x, inputOptions.heightFactors.y, inputOptions.heightFactors.z, inputOptions.heightFactors.w);
s.toNormalMap(inputOptions.bumpFrequencyScale.x, inputOptions.bumpFrequencyScale.y, inputOptions.bumpFrequencyScale.z, inputOptions.bumpFrequencyScale.w);
}
// To linear space.
if (!s.isNormalMap()) {
s.toLinear(inputOptions.inputGamma);
}
// Resize input.
s.resize(w, h, d, ResizeFilter_Box);
nvtt::Surface tmp = s;
if (!s.isNormalMap()) {
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, 0, compressionOptions, outputOptions);
images.push_back(s);
mipChainBroken.push_back(false);
}
static const unsigned char padding[3] = {0, 0, 0};
for (int m = 1; m < mipmapCount; m++)
{
w = max(1, w/2);
h = max(1, h/2);
d = max(1, d/2);
// https://www.khronos.org/opengles/sdk/tools/KTX/file_format_spec/#2.16
imageSize = estimateSize(w, h, d, 1, compressionOptions) * faceCount;
outputOptions.writeData(&imageSize, sizeof(uint32));
nvtt::Surface tmp;
for (int f = 0; f < faceCount; f++)
{
nvtt::Surface& img = images[f];
int idx = m * faceCount + f;
bool useSourceImages = false;
if (!mipChainBroken[f]) {
if (inputOptions.images[idx] == NULL) { // One face is missing in this mipmap level.
mipChainBroken[f] = false; // If one level is missing, ignore the following source images.
}
else {
useSourceImages = true;
}
}
if (useSourceImages) {
img.setImage(inputOptions.inputFormat, w, h, d, inputOptions.images[idx]);
// For already generated mipmaps, we need to convert to linear.
if (!img.isNormalMap()) {
img.toLinear(inputOptions.inputGamma);
}
}
else {
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
float params[2] = { inputOptions.kaiserStretch, inputOptions.kaiserAlpha };
img.buildNextMipmap(MipmapFilter_Kaiser, inputOptions.kaiserWidth, params);
}
else {
img.buildNextMipmap(inputOptions.mipmapFilter);
}
}
nvDebugCheck(img.width() == w);
nvDebugCheck(img.height() == h);
nvDebugCheck(img.depth() == d);
if (img.isNormalMap()) {
if (inputOptions.normalizeMipmaps) {
img.normalizeNormalMap();
}
tmp = img;
}
else {
tmp = img;
tmp.toGamma(inputOptions.outputGamma);
}
quantize(tmp, compressionOptions);
compress(tmp, f, m, compressionOptions, outputOptions);
//cube padding
if (faceCount == 6 && arraySize == 1)
{
//TODO calc offset for uncompressed images
}
}
int mipPadding = 3 - ((imageSize + 3) % 4);
if (mipPadding != 0) {
outputOptions.writeData(&padding, mipPadding);
}
}
}
return true;
}
@ -673,6 +803,131 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
return writeSucceed;
}
else if (outputOptions.container == Container_KTX)
{
KtxHeader header;
// TODO cube arrays
if (textureType == TextureType_2D) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 1;
header.pixelDepth = 0;
}
else if (textureType == TextureType_Cube) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 6;
header.pixelDepth = 0;
}
else if (textureType == TextureType_3D) {
nvCheck(arraySize == 1);
header.numberOfArrayElements = 0;
header.numberOfFaces = 1;
header.pixelDepth = d;
}
else if (textureType == TextureType_Array) {
header.numberOfArrayElements = arraySize;
header.numberOfFaces = 1;
header.pixelDepth = 0; // Is it?
}
header.pixelWidth = w;
header.pixelHeight = h;
header.numberOfMipmapLevels = mipmapCount;
bool supported = true;
// TODO non-compressed formats
if (compressionOptions.format == Format_RGBA)
{
//header.glType = ?;
//header.glTypeSize = ?;
//header.glFormat = ?;
}
else
{
header.glType = 0;
header.glTypeSize = 1;
header.glFormat = 0;
if (compressionOptions.format == Format_DXT1) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGB_S3TC_DXT1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_DXT1a) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_DXT3) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT3;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5 : KTX_INTERNAL_COMPRESSED_RGBA_S3TC_DXT5;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_BC4) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_RGTC1; // KTX_INTERNAL_COMPRESSED_SIGNED_RED_RGTC1 ?
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED;
}
else if (compressionOptions.format == Format_BC5) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_RGTC2; // KTX_INTERNAL_COMPRESSED_SIGNED_RG_RGTC2 ?
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG;
}
else if (compressionOptions.format == Format_BC6) {
if (compressionOptions.pixelType == PixelType_Float) header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_SIGNED_FLOAT;
else /*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RGB_BPTC_UNSIGNED_FLOAT; // By default we assume unsigned.
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_BC7) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_BPTC_UNORM : KTX_INTERNAL_COMPRESSED_RGBA_BPTC_UNORM;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else if (compressionOptions.format == Format_ETC1) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC1 : KTX_INTERNAL_COMPRESSED_RGB_ETC1;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_ETC2_R) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RED_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RED;
}
else if (compressionOptions.format == Format_ETC2_RG) {
header.glInternalFormat = KTX_INTERNAL_COMPRESSED_RG_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RG;
}
else if (compressionOptions.format == Format_ETC2_RGB) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ETC2 : KTX_INTERNAL_COMPRESSED_RGB_ETC2;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGB;
}
else if (compressionOptions.format == Format_ETC2_RGBA) {
header.glInternalFormat = outputOptions.srgb ? KTX_INTERNAL_COMPRESSED_SRGB_ALPHA_ETC2_EAC : KTX_INTERNAL_COMPRESSED_RGBA_ETC2_EAC;
header.glBaseInternalFormat = KTX_BASE_INTERNAL_RGBA;
}
else {
supported = false;
}
//TODO compressionOptions.format == Format_DXT1n, Format_DXT5n ? There seems to be no way to indicate a normal map using ktx. Maybe via key value data?
}
if (!supported)
{
// This container does not support the requested format.
outputOptions.error(Error_UnsupportedOutputFormat);
return false;
}
const uint headerSize = 64;
nvStaticCheck(sizeof(KtxHeader) == 64);
bool writeSucceed = outputOptions.writeData(&header, headerSize);
if (!writeSucceed)
{
outputOptions.error(Error_FileWrite);
}
return writeSucceed;
}
return true;
}
@ -788,15 +1043,34 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression
{
return new CompressorBC7;
}
/*else if (compressionOptions.format == Format_BC5_Luma)
{
return new ProductionCompressorBC5_Luma;
}*/
else if (compressionOptions.format == Format_BC3_RGBM)
{
return new CompressorBC3_RGBM;
}
else if (compressionOptions.format >= Format_ETC1 && compressionOptions.format <= Format_ETC2_RGB_A1)
{
#if defined(HAVE_RGETC)
if (compressionOptions.format == Format_ETC1 && compressionOptions.externalCompressor == "rg_etc") return new RgEtcCompressor;
#endif
#if defined(HAVE_ETCLIB)
if (compressionOptions.externalCompressor == "etclib") return new EtcLibCompressor;
#endif
if (compressionOptions.format == Format_ETC1) return new CompressorETC1;
else if (compressionOptions.format == Format_ETC2_R) return new CompressorETC2_R;
//else if (compressionOptions.format == Format_ETC2_RG) return new CompressorETC2_RG;
else if (compressionOptions.format == Format_ETC2_RGB) return new CompressorETC2_RGB;
else if (compressionOptions.format == Format_ETC2_RGBA) return new CompressorETC2_RGBA;
}
else if (compressionOptions.format == Format_ETC2_RGBM)
{
return new CompressorETC2_RGBM;
}
else if (compressionOptions.format >= Format_PVR_2BPP_RGB && compressionOptions.format <= Format_PVR_4BPP_RGBA)
{
#if defined(HAVE_PVRTEXTOOL)
return new CompressorPVR;
#endif
}
return NULL;
}
@ -860,3 +1134,24 @@ CompressorInterface * Compressor::Private::chooseGpuCompressor(const Compression
return NULL;
}
int Compressor::Private::estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const
{
const Format format = compressionOptions.format;
const uint bitCount = compressionOptions.bitcount;
const uint pitchAlignment = compressionOptions.pitchAlignment;
int size = 0;
for (int m = 0; m < mipmapCount; m++)
{
size += computeImageSize(w, h, d, bitCount, pitchAlignment, format);
// Compute extents of next mipmap:
w = max(1, w / 2);
h = max(1, h / 2);
d = max(1, d / 2);
}
return size;
}

@ -56,6 +56,7 @@ namespace nvtt
nv::CompressorInterface * chooseCpuCompressor(const CompressionOptions::Private & compressionOptions) const;
nv::CompressorInterface * chooseGpuCompressor(const CompressionOptions::Private & compressionOptions) const;
int estimateSize(int w, int h, int d, int mipmapCount, const CompressionOptions::Private & compressionOptions) const;
bool cudaSupported;
bool cudaEnabled;

@ -23,12 +23,14 @@
// OTHER DEALINGS IN THE SOFTWARE.
#include "Surface.h"
#include "CompressorETC.h" // for ETC decoder.
#include "nvmath/Vector.inl"
#include "nvmath/Matrix.inl"
#include "nvmath/Color.h"
#include "nvmath/Half.h"
#include "nvmath/ftoi.h"
#include "nvmath/PackedFloat.h"
#include "nvimage/Filter.h"
#include "nvimage/ImageIO.h"
@ -39,8 +41,13 @@
#include "nvimage/ErrorMetric.h"
#include "nvimage/DirectDrawSurface.h"
#include "nvthread/ParallelFor.h"
#include "nvcore/Array.inl"
#include <float.h>
#include <string.h> // memset, memcpy
//#include <stdio.h> // printf?
#if NV_CC_GNUC
#include <math.h> // exp2f and log2f
@ -123,6 +130,18 @@ namespace
else if (format == Format_BC7) {
return 16;
}
else if (format == Format_ETC1 || format == Format_ETC2_R || format == Format_ETC2_RGB) {
return 8;
}
else if (format == Format_ETC2_RG || format == Format_ETC2_RGBA || format == Format_ETC2_RGBM) {
return 16;
}
else if (format == Format_PVR_2BPP_RGB || format == Format_PVR_2BPP_RGBA) {
return 4;
}
else if (format == Format_PVR_4BPP_RGB || format == Format_PVR_4BPP_RGBA) {
return 8;
}
return 0;
}
@ -197,7 +216,7 @@ uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlign
}
}
void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType) {
void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent, RoundMode roundMode, TextureType textureType, nvtt::ShapeRestriction shapeRestriction /*= nvtt::ShapeRestriction_None*/) {
nvDebugCheck(width != NULL && *width > 0);
nvDebugCheck(height != NULL && *height > 0);
nvDebugCheck(depth != NULL && *depth > 0);
@ -234,21 +253,21 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
// Round to power of two.
if (roundMode == RoundMode_ToNextPowerOfTwo)
{
w = nextPowerOfTwo(w);
h = nextPowerOfTwo(h);
d = nextPowerOfTwo(d);
w = nextPowerOfTwo(U32(w));
h = nextPowerOfTwo(U32(h));
d = nextPowerOfTwo(U32(d));
}
else if (roundMode == RoundMode_ToNearestPowerOfTwo)
{
w = nearestPowerOfTwo(w);
h = nearestPowerOfTwo(h);
d = nearestPowerOfTwo(d);
w = nearestPowerOfTwo(U32(w));
h = nearestPowerOfTwo(U32(h));
d = nearestPowerOfTwo(U32(d));
}
else if (roundMode == RoundMode_ToPreviousPowerOfTwo)
{
w = previousPowerOfTwo(w);
h = previousPowerOfTwo(h);
d = previousPowerOfTwo(d);
w = previousPowerOfTwo(U32(w));
h = previousPowerOfTwo(U32(h));
d = previousPowerOfTwo(U32(d));
}
else if (roundMode == RoundMode_ToNextMultipleOfFour)
{
@ -269,6 +288,38 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
d = previousMultipleOfFour(d);
}
if(shapeRestriction == ShapeRestriction_Square)
{
if (textureType == TextureType_2D)
{
int md = nv::min(w,h);
w = md;
h = md;
d = 1;
}
else if (textureType == TextureType_3D)
{
int md = nv::min(nv::min(w,h),d);
w = md;
h = md;
d = md;
}
else if (textureType == TextureType_Cube)
{
int md = nv::min(w, h);
w = md;
h = md;
d = 1;
}
}
else
{
if (textureType == TextureType_2D || textureType == TextureType_Cube)
{
d = 1;
}
}
*width = w;
*height = h;
*depth = d;
@ -509,8 +560,8 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_c
}
}
*rangeMin = range.x;
*rangeMax = range.y;
if (rangeMin) *rangeMin = range.x;
if (rangeMax) *rangeMax = range.y;
}
bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
@ -583,7 +634,7 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/)
}
// @@ Have loadFloat allocate the image with the desired number of channels.
img->resizeChannelCount(4);
//img->resizeChannelCount(4);
delete m->image;
m->image = img.release();
@ -601,7 +652,8 @@ bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) c
return ImageIO::saveFloat(fileName, m->image, 0, 4);
}
else {
AutoPtr<Image> image(m->image->createImage(0, 4));
uint c = min<uint>(m->image->componentCount(), 4);
AutoPtr<Image> image(m->image->createImage(0, c));
nvCheck(image != NULL);
if (hasAlpha) {
@ -829,16 +881,35 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
return true;
}
#if defined(HAVE_PVRTEXTOOL)
#include <PVRTDecompress.h>
#endif
// @@ Add support for compressed 3D textures.
bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data)
{
if (format != nvtt::Format_BC1 &&
format != nvtt::Format_BC2 &&
format != nvtt::Format_BC3 &&
format != nvtt::Format_BC3n &&
format != nvtt::Format_BC3_RGBM &&
format != nvtt::Format_BC4 &&
format != nvtt::Format_BC5 &&
format != nvtt::Format_BC6 &&
format != nvtt::Format_BC7)
format != nvtt::Format_BC7 &&
format != nvtt::Format_ETC1 &&
format != nvtt::Format_ETC2_R &&
format != nvtt::Format_ETC2_RG &&
format != nvtt::Format_ETC2_RGB &&
format != nvtt::Format_ETC2_RGBA &&
format != nvtt::Format_ETC2_RGBM
#if defined(HAVE_PVRTEXTOOL)
&& format != nvtt::Format_PVR_2BPP_RGB
&& format != nvtt::Format_PVR_4BPP_RGB
&& format != nvtt::Format_PVR_2BPP_RGBA
&& format != nvtt::Format_PVR_4BPP_RGBA
#endif
)
{
return false;
}
@ -851,7 +922,7 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
m->image->allocate(4, w, h, 1);
m->type = TextureType_2D;
const int bw = (w + 3) / 4;
const int bw = (w + 3) / 4; // @@ Not if PVR 2bpp!
const int bh = (h + 3) / 4;
const uint bs = blockSize(format);
@ -859,30 +930,68 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
const uint8 * ptr = (const uint8 *)data;
TRY {
if (format == nvtt::Format_BC6)
#if defined(HAVE_PVRTEXTOOL)
if (format >= nvtt::Format_PVR_2BPP_RGB && format <= nvtt::Format_PVR_4BPP_RGBA)
{
// BC6 format - decode directly to float
bool two_bit_mode = (format == nvtt::Format_PVR_2BPP_RGB || format == nvtt::Format_PVR_2BPP_RGBA);
for (int y = 0; y < bh; y++)
{
for (int x = 0; x < bw; x++)
uint8 * output = new uint8[4 * w * h];
PVRTDecompressPVRTC(ptr, two_bit_mode, w, h, output);
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
m->image->pixel(0, x, y, 0) = output[4*(y*w + x) + 0] / 255.0f;
m->image->pixel(1, x, y, 0) = output[4*(y*w + x) + 1] / 255.0f;
m->image->pixel(2, x, y, 0) = output[4*(y*w + x) + 2] / 255.0f;
m->image->pixel(3, x, y, 0) = output[4*(y*w + x) + 3] / 255.0f;
}
}
delete [] output;
}
else
#endif
if (format == nvtt::Format_BC6 || (format >= nvtt::Format_ETC1 && format <= nvtt::Format_ETC2_RGBM))
{
Vector3 colors[16];
// Some formats we decode directly to float:
for (int y = 0; y < bh; y++) {
for (int x = 0; x < bw; x++) {
Vector4 colors[16];
if (format == nvtt::Format_BC6) {
const BlockBC6 * block = (const BlockBC6 *)ptr;
block->decodeBlock(colors);
}
else if (format == nvtt::Format_ETC1 || format == nvtt::Format_ETC2_RGB) {
nv::decompress_etc(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RGBA || format == nvtt::Format_ETC2_RGBM) {
nv::decompress_etc_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_R) {
// @@ Not implemented.
//nv::decompress_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RG) {
// @@ Not implemented.
//nv::decompress_eac(ptr, colors);
}
else if (format == nvtt::Format_ETC2_RGB_A1) {
// @@ Not implemented?
//nv::decompress_etc(ptr, colors);
}
for (int yy = 0; yy < 4; yy++)
{
for (int xx = 0; xx < 4; xx++)
{
Vector3 rgb = colors[yy*4 + xx];
for (int yy = 0; yy < 4; yy++) {
for (int xx = 0; xx < 4; xx++) {
Vector4 c = colors[yy*4 + xx];
if (x * 4 + xx < w && y * 4 + yy < h)
{
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = rgb.x;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = rgb.y;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = rgb.z;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = 1.0f;
if (x * 4 + xx < w && y * 4 + yy < h) {
m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = c.x;
m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = c.y;
m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = c.z;
m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = c.w;
}
}
}
@ -893,12 +1002,10 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
}
else
{
// Non-BC6 - decode to 8-bit, then convert to float
// Others, we decode to 8-bit, then convert to float
for (int y = 0; y < bh; y++)
{
for (int x = 0; x < bw; x++)
{
for (int y = 0; y < bh; y++) {
for (int x = 0; x < bw; x++) {
ColorBlock colors;
if (format == nvtt::Format_BC1)
@ -929,7 +1036,7 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC3)
else if (format == nvtt::Format_BC3 || format == nvtt::Format_BC3n || format == nvtt::Format_BC3_RGBM)
{
const BlockDXT5 * block = (const BlockDXT5 *)ptr;
@ -1092,7 +1199,7 @@ void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth
m->image = img;
}
void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter)
void Surface::resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter)
{
if (isNull()) return;
@ -1104,27 +1211,17 @@ void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilte
int h = m->image->height();
int d = m->image->depth();
getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type);
getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type, nvtt::ShapeRestriction_Square);
if (m->type == TextureType_2D)
{
nvDebugCheck(d==1);
int md = nv::min(w,h);
w = md;
h = md;
}
else if (m->type == TextureType_Cube)
{
nvDebugCheck(d==1);
nvDebugCheck(w==h);
}
else if (m->type == TextureType_3D)
{
int md = nv::min(nv::min(w,h),d);
w = md;
h = md;
d = md;
}
resize(w, h, d, filter, filterWidth, params);
}
@ -1151,6 +1248,63 @@ void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, fl
resize(w, h, d, filter, filterWidth, params);
}
float rmsBilinearError(nvtt::Surface original, nvtt::Surface resized) {
return nv::rmsBilinearColorError(original.m->image, resized.m->image, (FloatImage::WrapMode)original.wrapMode(), original.alphaMode() == AlphaMode_Transparency);
}
void Surface::autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter)
{
Surface original = *this;
Surface resized = original;
int w = width();
int h = height();
int d = depth();
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
while (w >= 4 && h >= 4 && d >= 1) {
// Resize always from original? This is more expensive, but should produce higher quality.
//resized = original;
resized.resize(w, h, d, filter);
#if 0
// Scale back up to original size. @@ Upscaling not implemented!
Surface restored = resized;
restored.resize(original.width(), original.height(), original.depth(), ResizeFilter_Triangle);
float error;
if (isNormalMap()) {
error = nvtt::angularError(original, restored);
}
else {
error = nvtt::rmsError(original, restored);
}
#else
float error = rmsBilinearError(original, resized);
#endif
if (error < errorTolerance) {
*this = resized;
nvDebug("image resized %dx%d -> %dx%d (error=%f)\n", original.width(), original.height(), w, h, error);
}
else {
nvDebug("image can't be resized further (error=%f)\n", error);
break;
}
w = (w + 1) / 2;
h = (h + 1) / 2;
d = (d + 1) / 2;
}
}
bool Surface::canMakeNextMipmap(int min_size /*= 1*/)
{
if (isNull()) return false;
@ -1196,7 +1350,7 @@ bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const floa
{
nvDebugCheck(filter == MipmapFilter_Kaiser);
KaiserFilter filter(filterWidth);
if (params != NULL) filter.setParameters(params[0], params[1]);
if (params != NULL) filter.setParameters(/*alpha=*/params[0], /*stretch=*/params[1]);
img = img->downSample(filter, wrapMode, 3);
}
}
@ -1357,8 +1511,9 @@ void Surface::toSrgb()
for (uint c = 0; c < 3; c++) {
float * channel = img->channel(c);
for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [=](int i) {
channel[i] = ::toSrgb(channel[i]);
}
}//);
}
}
@ -1382,8 +1537,9 @@ void Surface::toLinearFromSrgb()
for (uint c = 0; c < 3; c++) {
float * channel = img->channel(c);
for (uint i = 0; i < count; i++) {
//parallel_for(count, 128, [=](int i) {
channel[i] = ::fromSrgb(channel[i]);
}
}//);
}
}
@ -2827,6 +2983,78 @@ Surface Surface::createSubImage(int x0, int x1, int y0, int y1, int z0, int z1)
return s;
}
Surface Surface::warp(int w, int h, WarpFunction * warp_function) const
{
Surface s;
FloatImage * img = s.m->image = new FloatImage;
const int C = m->image->componentCount();
img->allocate(C, w, h, 1);
#define USE_PARALLEL_FOR 0
#if USE_PARALLEL_FOR
nv::parallel_for(h, 1, [=](int y) {
#else
for (int y = 0; y < h; y++) {
#endif
for (int x = 0; x < w; x++) {
float fx = (float(x) + 0.0f) / w;
float fy = (float(y) + 0.0f) / h;
float fz = 0;
warp_function(fx, fy, fz);
for (int c = 0; c < C; c++) {
img->pixel(c, x, y, 0) = m->image->sampleLinearClamp(c, fx, fy);
}
}
}
#if USE_PARALLEL_FOR
);
#endif
return s;
}
Surface Surface::warp(int w, int h, int d, WarpFunction * warp_function) const
{
Surface s;
FloatImage * img = s.m->image = new FloatImage;
const int C = m->image->componentCount();
img->allocate(C, w, h, d);
for (int z = 0; z < d; z++) {
#define USE_PARALLEL_FOR 0
#if USE_PARALLEL_FOR
nv::parallel_for(h, 1, [=](int y) {
#else
for (int y = 0; y < h; y++) {
#endif
for (int x = 0; x < w; x++) {
float fx = (float(x) + 0.0f) / w;
float fy = (float(y) + 0.0f) / h;
float fz = (float(z) + 0.0f) / d;
warp_function(fx, fy, fz);
for (int c = 0; c < C; c++) {
img->pixel(c, x, y, z) = m->image->sampleLinearClamp(c, fx, fy, fz); // @@ 2D only.
}
}
}
#if USE_PARALLEL_FOR
);
#endif
}
return s;
}
bool Surface::copyChannel(const Surface & srcImage, int srcChannel)
{
return copyChannel(srcImage, srcChannel, srcChannel);
@ -2953,7 +3181,7 @@ void Surface::setAtlasBorder(int aw, int ah, float r, float g, float b, float a)
}
// Vertical lines:
for (uint i = 0, x = 0; i < uint(ah); i++, x += tile_width)
for (uint i = 0, x = 0; i < uint(aw); i++, x += tile_width)
{
for (uint y = 0; y < h; y++)
{
@ -3083,9 +3311,9 @@ Surface nvtt::histogram(const Surface & img, int width, int height)
return histogram(img, /*minRange*/0, maxRange, width, height);
}
#include "nvcore/Array.inl"
#include "nvmath/PackedFloat.h"
#include <stdio.h>
//#include "nvcore/Array.inl"
//#include "nvmath/PackedFloat.h"
//#include <stdio.h>
nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRange, int width, int height)
{
@ -3234,7 +3462,7 @@ nvtt::Surface nvtt::histogram(const Surface & img, float minRange, float maxRang
maxh = nv::max(maxh, nv::max3(buckets[i].x, buckets[i].y, buckets[i].z));
}
printf("maxh = %f\n", maxh);
//printf("maxh = %f\n", maxh);
//maxh = 80;
maxh = 256;

@ -83,7 +83,7 @@ namespace nv {
uint countMipmaps(uint w, uint h, uint d);
uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size);
uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format);
void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType);
void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType, nvtt::ShapeRestriction shapeRestriction = nvtt::ShapeRestriction_None);
}

@ -10,8 +10,8 @@
// Gran Central Dispatch (GCD/libdispatch)
// http://developer.apple.com/mac/library/documentation/Performance/Reference/GCD_libdispatch_Ref/Reference/reference.html
#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H)
#define HAVE_GCD 1
#include <dispatch/dispatch.h>
//#define HAVE_GCD 1
//#include <dispatch/dispatch.h>
#endif
// Parallel Patterns Library (PPL) is part of Microsoft's concurrency runtime:
@ -64,7 +64,7 @@ namespace nvtt {
#endif
#if NV_OS_DARWIN && defined(HAVE_DISPATCH_H)
#if HAVE_GCD
// Task dispatcher using Apple's Grand Central Dispatch.
struct AppleTaskDispatcher : public TaskDispatcher

@ -47,9 +47,9 @@ const char * nvtt::errorString(Error e)
return "Error writing through output handler";
case Error_UnsupportedOutputFormat:
return "The container file does not support the selected output format";
}
default:
return "Invalid error";
}
}
// Return NVTT version.

@ -105,7 +105,21 @@ namespace nvtt
Format_BC6,
Format_BC7,
Format_BC3_RGBM, //
Format_BC3_RGBM,
Format_ETC1,
Format_ETC2_R,
Format_ETC2_RG,
Format_ETC2_RGB,
Format_ETC2_RGBA,
Format_ETC2_RGB_A1,
Format_ETC2_RGBM,
Format_PVR_2BPP_RGB, // Using PVR textools.
Format_PVR_4BPP_RGB,
Format_PVR_2BPP_RGBA,
Format_PVR_4BPP_RGBA,
Format_Count
};
@ -155,6 +169,7 @@ namespace nvtt
NVTT_API void setFormat(Format format);
NVTT_API void setQuality(Quality quality);
NVTT_API void setColorWeights(float red, float green, float blue, float alpha = 1.0f);
NVTT_API void setRGBMThreshold(float min_m);
NVTT_API void setExternalCompressor(const char * name);
@ -173,9 +188,10 @@ namespace nvtt
NVTT_API void setTargetDecoder(Decoder decoder);
// Translate to and from D3D formats.
NVTT_API Format format() const;
NVTT_API unsigned int d3d9Format() const;
NVTT_API unsigned int dxgiFormat() const;
//NVTT_API bool setD3D9Format(unsigned int format);
//NVTT_API unsigned int dxgiFormat() const;
//NVTT_API bool setDxgiFormat(unsigned int format);
};
@ -253,6 +269,14 @@ namespace nvtt
AlphaMode_Premultiplied,
};
// Extents shape restrictions
enum ShapeRestriction
{
ShapeRestriction_None,
ShapeRestriction_Square,
};
// Input options. Specify format and layout of the input texture. (Deprecated in NVTT 2.1)
struct InputOptions
{
@ -344,7 +368,7 @@ namespace nvtt
{
Container_DDS,
Container_DDS10,
// Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/
Container_KTX, // Khronos Texture: http://www.khronos.org/opengles/sdk/tools/KTX/
// Container_VTF, // Valve Texture Format: http://developer.valvesoftware.com/wiki/Valve_Texture_Format
};
@ -439,6 +463,9 @@ namespace nvtt
ToneMapper_Lightmap,
};
// Transform the given x,y coordinates.
typedef void WarpFunction(float & x, float & y, float & d);
// A surface is one level of a 2D or 3D texture. (New in NVTT 2.1)
// @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression.
@ -486,7 +513,8 @@ namespace nvtt
NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter);
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter);
NVTT_API void resizeMakeSquare(int maxExtent, RoundMode roundMode, ResizeFilter filter);
NVTT_API void autoResize(float errorTolerance, RoundMode mode, ResizeFilter filter);
NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1);
NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1);
@ -554,6 +582,10 @@ namespace nvtt
NVTT_API void flipZ();
NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const;
NVTT_API Surface warp(int w, int h, WarpFunction * f) const;
NVTT_API Surface warp(int w, int h, int d, WarpFunction * f) const;
// Copy image data.
NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel);
NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel, int dstChannel);

@ -146,9 +146,16 @@ static const char * s_witnessImageSet[] = {
};
static const char * s_witnessLmapImageSet[] = {
"specruin.dds",
"cottage.dds",
"hallway.dds",
"windmill.dds",
"tunnel.dds",
"theater.dds",
"tower.dds",
"hub.dds",
"mine.dds",
"archway.dds",
"hut.dds",
"shaft.dds",
};
static const char * s_normalMapImageSet[] = {
@ -189,6 +196,12 @@ enum Mode {
//Mode_BC5_Normal_DualParaboloid,
Mode_BC6,
Mode_BC7,
Mode_ETC1_IC,
Mode_ETC1_EtcLib,
Mode_ETC2_EtcLib,
Mode_ETC1_RgEtc,
Mode_ETC2_RGBM,
Mode_PVR,
Mode_Count
};
static const char * s_modeNames[] = {
@ -209,6 +222,12 @@ static const char * s_modeNames[] = {
//"BC5-Normal-DualParaboloid", // Mode_BC5_Normal_DualParaboloid,
"BC6", // Mode_BC6,
"BC7", // Mode_BC7,
"ETC1-IC",
"ETC1-EtcLib",
"ETC2-EtcLib",
"ETC1-RgEtc",
"ETC2-RGBM",
"PVR",
};
nvStaticCheck(NV_ARRAY_SIZE(s_modeNames) == Mode_Count);
@ -218,14 +237,16 @@ struct Test {
Mode modes[6];
};
static Test s_imageTests[] = {
{"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}},
{"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}},
//{"Normal", 3, {Mode_BC1_Normal, Mode_BC3_Normal, Mode_BC5_Normal}},
{"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}},
{"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}},
{"HDR", 2, {Mode_BC3_RGBM, Mode_BC6}},
{"BC6", 1, {Mode_BC6}},
{"BC7", 1, {Mode_BC7}},
/*0*/ {"Color", 3, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, /*Mode_BC3_LUVW*/}},
/*1*/ {"Alpha", 3, {Mode_BC1_Alpha, Mode_BC2_Alpha, Mode_BC3_Alpha}},
/*2*/ {"Normal", 4, {Mode_BC5_Normal, Mode_BC5_Normal_Stereographic, Mode_BC5_Normal_Paraboloid, Mode_BC5_Normal_Quartic}},
/*3*/ {"Lightmap", 4, {Mode_BC1, Mode_BC3_YCoCg, Mode_BC3_RGBM, Mode_BC3_RGBS}},
/*4*/ {"HDR", 3, {Mode_ETC2_RGBM, Mode_BC3_RGBM, Mode_BC6}},
/*5*/ {"BC6", 1, {Mode_BC6}},
/*6*/ {"BC7", 1, {Mode_BC7}},
/*7*/ {"ETC", 3, {Mode_ETC1_IC, Mode_ETC1_RgEtc, Mode_ETC2_EtcLib}},
/*8*/ {"Color Mobile", 4, {Mode_PVR, Mode_ETC1_IC, Mode_ETC2_EtcLib, Mode_BC1}},
/*9*/ //{"ETC-Lightmap", 2, {Mode_BC3_RGBM, Mode_ETC_RGBM}},
};
const int s_imageTestCount = ARRAY_SIZE(s_imageTests);
@ -462,7 +483,8 @@ int main(int argc, char *argv[])
}
else
{
compressionOptions.setQuality(nvtt::Quality_Production);
compressionOptions.setQuality(nvtt::Quality_Normal);
//compressionOptions.setQuality(nvtt::Quality_Production);
}
//compressionOptions.setExternalCompressor("ati");
//compressionOptions.setExternalCompressor("squish");
@ -515,13 +537,13 @@ int main(int argc, char *argv[])
// Labels on the left side.
if (errorMode == ErrorMode_RMSE) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01";
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.03,0.01";
}
else if (errorMode == ErrorMode_CieLab) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,4,22,1";
}
else if (errorMode == ErrorMode_AngularRMSE) {
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.05,0.01";
graphWriter << "&chxr=0,1," << set.fileCount << ",1|1,0,0.2,0.02"; // 0.05,0.01
}
// Labels at the bottom.
@ -583,14 +605,13 @@ int main(int argc, char *argv[])
}
Timer timer;
//int failedTests = 0;
//float totalDiff = 0;
nvtt::Surface img;
printf("Running Test: %s with Set: %s\n", test.name, set.name);
printf("Running test '%s' with set '%s'\n", test.name, set.name);
graphWriter << "&chd=t:";
@ -602,10 +623,11 @@ int main(int argc, char *argv[])
Mode mode = test.modes[t];
nvtt::Format format;
const char * compressor_name = NULL;
if (mode == Mode_BC1 || mode == Mode_BC1_Alpha || mode == Mode_BC1_Normal || mode == Mode_BC3_RGBS) {
format = nvtt::Format_BC1;
}
else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_RGBM || mode == Mode_BC3_LUVW) {
else if (mode == Mode_BC3_Alpha || mode == Mode_BC3_YCoCg || mode == Mode_BC3_LUVW) {
format = nvtt::Format_BC3;
}
else if (mode == Mode_BC3_Normal) {
@ -614,6 +636,9 @@ int main(int argc, char *argv[])
else if (mode == Mode_BC5_Normal || mode == Mode_BC5_Normal_Stereographic || mode == Mode_BC5_Normal_Paraboloid || mode == Mode_BC5_Normal_Quartic) {
format = nvtt::Format_BC5;
}
else if (mode == Mode_BC3_RGBM) {
format = nvtt::Format_BC3_RGBM;
}
else if (mode == Mode_BC6)
{
format = nvtt::Format_BC6;
@ -622,12 +647,40 @@ int main(int argc, char *argv[])
{
format = nvtt::Format_BC7;
}
else if (mode == Mode_ETC1_IC)
{
format = nvtt::Format_ETC1;
}
else if (mode == Mode_ETC1_EtcLib)
{
format = nvtt::Format_ETC1;
compressor_name = "etclib";
}
else if (mode == Mode_ETC2_EtcLib)
{
format = nvtt::Format_ETC2_RGB;
compressor_name = "etclib";
}
else if (mode == Mode_ETC1_RgEtc)
{
format = nvtt::Format_ETC1;
compressor_name = "rg_etc";
}
else if (mode == Mode_ETC2_RGBM)
{
format = nvtt::Format_ETC2_RGBM;
}
else if (mode == Mode_PVR)
{
format = nvtt::Format_PVR_4BPP_RGB;
}
else
{
nvDebugCheck(false);
nvUnreachable();
}
compressionOptions.setFormat(format);
if (compressor_name) compressionOptions.setExternalCompressor(compressor_name);
if (set.type == ImageType_RGBA) {
img.setAlphaMode(nvtt::AlphaMode_Transparency);
@ -653,6 +706,7 @@ int main(int argc, char *argv[])
printf("Input image '%s' not found.\n", set.fileNames[i]);
return EXIT_FAILURE;
}
float color_range = 0.0f;
if (img.isNormalMap()) {
img.normalizeNormalMap();
@ -693,16 +747,34 @@ int main(int argc, char *argv[])
tmp.clamp(2);
tmp.clamp(3);
}
else if (mode == Mode_BC3_RGBM) {
tmp.setAlphaMode(nvtt::AlphaMode_None);
if (set.type == ImageType_HDR) {
// Transform to gamma-2.0 space before applying RGBM - helps a lot with banding in the darks.
tmp.toGamma(2.0f);
tmp.toRGBM(3.0f); // range of 3.0 in gamma-2.0 space == range of 9.0 in linear space
else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) {
float r, g, b;
tmp.range(0, NULL, &r);
tmp.range(1, NULL, &g);
tmp.range(2, NULL, &b);
color_range = max3(r, g, b);
printf("color range = %f\n", color_range);
tmp.setAlphaMode(nvtt::AlphaMode_Transparency);
const float max_color_range = 16.0f;
if (color_range > max_color_range) {
color_range = max_color_range;
}
else {
tmp.toRGBM();
for (int i = 0; i < 3; i++) {
tmp.scaleBias(i, 1.0f / color_range, 0.0f);
}
tmp.toneMap(nvtt::ToneMapper_Linear, /*parameters=*/NULL); // Clamp without changing the hue.
// Clamp alpha.
tmp.clamp(3);
// To gamma.
tmp.toGamma(2);
compressionOptions.setRGBMThreshold(0.2f);
}
else if (mode == Mode_BC3_LUVW) {
tmp.setAlphaMode(nvtt::AlphaMode_None);
@ -781,14 +853,25 @@ int main(int argc, char *argv[])
}*/
}
}
else if (mode == Mode_BC3_RGBM) {
if (set.type == ImageType_HDR) {
img_out.fromRGBM(3.0f);
else if (mode == Mode_BC3_RGBM || mode == Mode_ETC2_RGBM) {
/*if (set.type == ImageType_HDR) {
//img_out.fromRGBM(3.0f);
img_out.fromRGBM(range);
img_out.toLinear(2.0f);
}
else {
img_out.fromRGBM();
}*/
img_out.fromRGBM(1.0f, 0.2f);
img_out.toLinear(2);
for (int i = 0; i < 3; i++) {
img_out.scaleBias(i, color_range, 0.0f);
}
img_out.copyChannel(img, 3); // Copy alpha channel from source.
img_out.setAlphaMode(nvtt::AlphaMode_Transparency);
}
else if (mode == Mode_BC3_LUVW) {
if (set.type == ImageType_HDR) {

@ -61,6 +61,9 @@ struct MyAssertHandler : public nv::AssertHandler {
virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) {
fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line);
nv::debug::dumpInfo();
if (nv::debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG;
}
exit(1);
}
};

@ -154,11 +154,13 @@ int main(int argc, char *argv[])
bool loadAsFloat = false;
bool rgbm = false;
bool rangescale = false;
bool srgb = false;
const char * externalCompressor = NULL;
bool silent = false;
bool dds10 = false;
bool ktx = false;
nv::Path input;
nv::Path output;
@ -285,6 +287,31 @@ int main(int argc, char *argv[])
format = nvtt::Format_BC3_RGBM;
rgbm = true;
}
else if (strcmp("-etc1", argv[i]) == 0)
{
format = nvtt::Format_ETC1;
}
else if (strcmp("-etc2", argv[i]) == 0 || strcmp("-etc2_rgb", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGB;
}
else if (strcmp("-etc2_eac", argv[i]) == 0 || strcmp("-etc2_rgba", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGBA;
}
else if (strcmp("-eac", argv[i]) == 0 || strcmp("-etc2_r", argv[i]) == 0)
{
format = nvtt::Format_ETC2_R;
}
else if (strcmp("-etc2_rg", argv[i]) == 0)
{
format = nvtt::Format_ETC2_R;
}
else if (strcmp("-etc2_rgbm", argv[i]) == 0)
{
format = nvtt::Format_ETC2_RGBM;
rgbm = true;
}
// Undocumented option. Mainly used for testing.
else if (strcmp("-ext", argv[i]) == 0)
@ -309,6 +336,14 @@ int main(int argc, char *argv[])
{
dds10 = true;
}
else if (strcmp("-ktx", argv[i]) == 0)
{
ktx = true;
}
else if (strcmp("-srgb", argv[i]) == 0)
{
srgb = true;
}
else if (argv[i][0] != '-')
{
@ -321,8 +356,16 @@ int main(int argc, char *argv[])
{
output.copy(input.str());
output.stripExtension();
if (ktx)
{
output.append(".ktx");
}
else
{
output.append(".dds");
}
}
break;
}
@ -380,7 +423,9 @@ int main(int argc, char *argv[])
printf("Output options:\n");
printf(" -silent \tDo not output progress messages\n");
printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7)\n\n");
printf(" -dds10 \tUse DirectX 10 DDS format (enabled by default for BC6/7, unless ktx is being used)\n");
printf(" -ktx \tUse KTX container format\n");
printf(" -srgb \tIf the requested format allows it, output will be in sRGB color space\n\n");
return EXIT_FAILURE;
}
@ -398,7 +443,7 @@ int main(int argc, char *argv[])
bool useSurface = false; // @@ use Surface API in all cases!
nvtt::Surface image;
if (format == nvtt::Format_BC3_RGBM || rgbm) {
if (format == nvtt::Format_BC3_RGBM || format == nvtt::Format_ETC2_RGBM || rgbm) {
useSurface = true;
if (!image.load(input.str())) {
@ -440,7 +485,7 @@ int main(int argc, char *argv[])
// To gamma.
image.toGamma(2);
if (format != nvtt::Format_BC3_RGBM) {
if (format != nvtt::Format_BC3_RGBM || format != nvtt::Format_ETC2_RGBM) {
image.setAlphaMode(nvtt::AlphaMode_None);
image.toRGBM(1, 0.15f);
}
@ -494,7 +539,7 @@ int main(int argc, char *argv[])
nvDebugCheck(dds.isTextureArray());
inputOptions.setTextureLayout(nvtt::TextureType_Array, dds.width(), dds.height(), 1, dds.arraySize());
faceCount = dds.arraySize();
dds10 = true;
dds10 = ktx ? false : true;
}
uint mipmapCount = dds.mipmapCount();
@ -569,11 +614,12 @@ int main(int argc, char *argv[])
inputOptions.setAlphaMode(nvtt::AlphaMode_None);
}
// IC: Do not enforce D3D9 restrictions anymore.
// Block compressed textures with mipmaps must be powers of two.
if (!noMipmaps && format != nvtt::Format_RGB)
/*if (!noMipmaps && format != nvtt::Format_RGB)
{
inputOptions.setRoundMode(nvtt::RoundMode_ToPreviousPowerOfTwo);
}
}*/
if (normal)
{
@ -720,16 +766,28 @@ int main(int argc, char *argv[])
outputOptions.setOutputHandler(&outputHandler);
outputOptions.setErrorHandler(&errorHandler);
// Automatically use dds10 if compressing to BC6 or BC7
if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7)
if (ktx)
{
outputOptions.setContainer(nvtt::Container_KTX);
}
else
{
// Automatically use dds10 if compressing to BC6 or BC7
if (format == nvtt::Format_BC6 || format == nvtt::Format_BC7) {
dds10 = true;
}
if (dds10)
{
if (dds10) {
outputOptions.setContainer(nvtt::Container_DDS10);
}
else {
outputOptions.setContainer(nvtt::Container_DDS);
}
}
if (srgb) {
outputOptions.setSrgbFlag(true);
}
// printf("Press ENTER.\n");
// fflush(stdout);

Loading…
Cancel
Save