Merge changes from the witness.
This commit is contained in:
parent
4cb60cc5ba
commit
d019cd7080
@ -96,8 +96,11 @@ namespace nv
|
|||||||
/// Get vector pointer.
|
/// Get vector pointer.
|
||||||
NV_FORCEINLINE T * buffer() { return m_buffer; }
|
NV_FORCEINLINE T * buffer() { return m_buffer; }
|
||||||
|
|
||||||
|
/// Provide begin/end pointers for C++11 range-based for loops.
|
||||||
NV_FORCEINLINE T * begin() { return m_buffer; }
|
NV_FORCEINLINE T * begin() { return m_buffer; }
|
||||||
NV_FORCEINLINE T * end() { return m_buffer + m_size; }
|
NV_FORCEINLINE T * end() { return m_buffer + m_size; }
|
||||||
|
NV_FORCEINLINE const T * begin() const { return m_buffer; }
|
||||||
|
NV_FORCEINLINE const T * end() const { return m_buffer + m_size; }
|
||||||
|
|
||||||
/// Is vector empty.
|
/// Is vector empty.
|
||||||
NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
|
NV_FORCEINLINE bool isEmpty() const { return m_size == 0; }
|
||||||
@ -106,6 +109,7 @@ namespace nv
|
|||||||
NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
|
NV_FORCEINLINE bool isNull() const { return m_buffer == NULL; }
|
||||||
|
|
||||||
|
|
||||||
|
T & append();
|
||||||
void push_back( const T & val );
|
void push_back( const T & val );
|
||||||
void pushBack( const T & val );
|
void pushBack( const T & val );
|
||||||
Array<T> & append( const T & val );
|
Array<T> & append( const T & val );
|
||||||
|
@ -16,6 +16,18 @@
|
|||||||
|
|
||||||
namespace nv
|
namespace nv
|
||||||
{
|
{
|
||||||
|
template <typename T>
|
||||||
|
NV_FORCEINLINE T & Array<T>::append()
|
||||||
|
{
|
||||||
|
uint old_size = m_size;
|
||||||
|
uint new_size = m_size + 1;
|
||||||
|
|
||||||
|
setArraySize(new_size);
|
||||||
|
|
||||||
|
construct_range(m_buffer, new_size, old_size);
|
||||||
|
|
||||||
|
return m_buffer[old_size]; // Return reference to last element.
|
||||||
|
}
|
||||||
|
|
||||||
// Push an element at the end of the vector.
|
// Push an element at the end of the vector.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
@ -211,7 +223,7 @@ namespace nv
|
|||||||
void Array<T>::replaceWithLast(uint index)
|
void Array<T>::replaceWithLast(uint index)
|
||||||
{
|
{
|
||||||
nvDebugCheck( index < m_size );
|
nvDebugCheck( index < m_size );
|
||||||
nv::swap(m_buffer[index], back());
|
nv::swap(m_buffer[index], back()); // @@ Is this OK when index == size-1?
|
||||||
(m_buffer+m_size-1)->~T();
|
(m_buffer+m_size-1)->~T();
|
||||||
m_size--;
|
m_size--;
|
||||||
}
|
}
|
||||||
|
@ -66,6 +66,10 @@
|
|||||||
# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if NV_OS_ORBIS
|
||||||
|
#include <libdbg.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
#define NV_USE_SEPARATE_THREAD 1
|
#define NV_USE_SEPARATE_THREAD 1
|
||||||
|
|
||||||
|
|
||||||
@ -263,7 +267,7 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
/*static NV_NOINLINE int backtrace(void * trace[], int maxcount) {
|
/*static NV_NOINLINE int backtrace(void * trace[], int maxcount) {
|
||||||
|
|
||||||
// In Windows XP and Windows Server 2003, the sum of the FramesToSkip and FramesToCapture parameters must be less than 63.
|
// In Windows XP and Windows Server 2003, the sum of the FramesToSkip and FramesToCapture parameters must be less than 63.
|
||||||
int xp_maxcount = min(63-1, maxcount);
|
int xp_maxcount = min(63-1, maxcount);
|
||||||
|
|
||||||
@ -274,7 +278,7 @@ namespace
|
|||||||
}*/
|
}*/
|
||||||
|
|
||||||
static NV_NOINLINE int backtraceWithSymbols(CONTEXT * ctx, void * trace[], int maxcount, int skip = 0) {
|
static NV_NOINLINE int backtraceWithSymbols(CONTEXT * ctx, void * trace[], int maxcount, int skip = 0) {
|
||||||
|
|
||||||
// Init the stack frame for this function
|
// Init the stack frame for this function
|
||||||
STACKFRAME64 stackFrame = { 0 };
|
STACKFRAME64 stackFrame = { 0 };
|
||||||
|
|
||||||
@ -344,74 +348,74 @@ namespace
|
|||||||
StringBuilder builder(512);
|
StringBuilder builder(512);
|
||||||
|
|
||||||
HANDLE hProcess = GetCurrentProcess();
|
HANDLE hProcess = GetCurrentProcess();
|
||||||
|
|
||||||
// Resolve PC to function names
|
// Resolve PC to function names
|
||||||
for (int i = start; i < size; i++)
|
for (int i = start; i < size; i++)
|
||||||
{
|
{
|
||||||
// Check for end of stack walk
|
// Check for end of stack walk
|
||||||
DWORD64 ip = (DWORD64)trace[i];
|
DWORD64 ip = (DWORD64)trace[i];
|
||||||
if (ip == NULL)
|
if (ip == NULL)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
// Get function name
|
// Get function name
|
||||||
#define MAX_STRING_LEN (512)
|
#define MAX_STRING_LEN (512)
|
||||||
unsigned char byBuffer[sizeof(IMAGEHLP_SYMBOL64) + MAX_STRING_LEN] = { 0 };
|
unsigned char byBuffer[sizeof(IMAGEHLP_SYMBOL64) + MAX_STRING_LEN] = { 0 };
|
||||||
IMAGEHLP_SYMBOL64 * pSymbol = (IMAGEHLP_SYMBOL64*)byBuffer;
|
IMAGEHLP_SYMBOL64 * pSymbol = (IMAGEHLP_SYMBOL64*)byBuffer;
|
||||||
pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
|
pSymbol->SizeOfStruct = sizeof(IMAGEHLP_SYMBOL64);
|
||||||
pSymbol->MaxNameLength = MAX_STRING_LEN;
|
pSymbol->MaxNameLength = MAX_STRING_LEN;
|
||||||
|
|
||||||
DWORD64 dwDisplacement;
|
DWORD64 dwDisplacement;
|
||||||
|
|
||||||
if (SymGetSymFromAddr64(hProcess, ip, &dwDisplacement, pSymbol))
|
if (SymGetSymFromAddr64(hProcess, ip, &dwDisplacement, pSymbol))
|
||||||
{
|
{
|
||||||
pSymbol->Name[MAX_STRING_LEN-1] = 0;
|
pSymbol->Name[MAX_STRING_LEN-1] = 0;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
// Make the symbol readable for humans
|
// Make the symbol readable for humans
|
||||||
UnDecorateSymbolName( pSym->Name, lpszNonUnicodeUnDSymbol, BUFFERSIZE,
|
UnDecorateSymbolName( pSym->Name, lpszNonUnicodeUnDSymbol, BUFFERSIZE,
|
||||||
UNDNAME_COMPLETE |
|
UNDNAME_COMPLETE |
|
||||||
UNDNAME_NO_THISTYPE |
|
UNDNAME_NO_THISTYPE |
|
||||||
UNDNAME_NO_SPECIAL_SYMS |
|
UNDNAME_NO_SPECIAL_SYMS |
|
||||||
UNDNAME_NO_MEMBER_TYPE |
|
UNDNAME_NO_MEMBER_TYPE |
|
||||||
UNDNAME_NO_MS_KEYWORDS |
|
UNDNAME_NO_MS_KEYWORDS |
|
||||||
UNDNAME_NO_ACCESS_SPECIFIERS );
|
UNDNAME_NO_ACCESS_SPECIFIERS );
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// pSymbol->Name
|
// pSymbol->Name
|
||||||
const char * pFunc = pSymbol->Name;
|
const char * pFunc = pSymbol->Name;
|
||||||
|
|
||||||
// Get file/line number
|
|
||||||
IMAGEHLP_LINE64 theLine = { 0 };
|
|
||||||
theLine.SizeOfStruct = sizeof(theLine);
|
|
||||||
|
|
||||||
DWORD dwDisplacement;
|
// Get file/line number
|
||||||
if (!SymGetLineFromAddr64(hProcess, ip, &dwDisplacement, &theLine))
|
IMAGEHLP_LINE64 theLine = { 0 };
|
||||||
{
|
theLine.SizeOfStruct = sizeof(theLine);
|
||||||
|
|
||||||
|
DWORD dwDisplacement;
|
||||||
|
if (!SymGetLineFromAddr64(hProcess, ip, &dwDisplacement, &theLine))
|
||||||
|
{
|
||||||
// Do not print unknown symbols anymore.
|
// Do not print unknown symbols anymore.
|
||||||
break;
|
break;
|
||||||
//builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc);
|
//builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
/*
|
/*
|
||||||
const char* pFile = strrchr(theLine.FileName, '\\');
|
const char* pFile = strrchr(theLine.FileName, '\\');
|
||||||
if ( pFile == NULL ) pFile = theLine.FileName;
|
if ( pFile == NULL ) pFile = theLine.FileName;
|
||||||
else pFile++;
|
else pFile++;
|
||||||
*/
|
*/
|
||||||
const char * pFile = theLine.FileName;
|
const char * pFile = theLine.FileName;
|
||||||
|
|
||||||
int line = theLine.LineNumber;
|
int line = theLine.LineNumber;
|
||||||
|
|
||||||
builder.format("%s(%d) : %s\n", pFile, line, pFunc);
|
builder.format("%s(%d) : %s\n", pFile, line, pFunc);
|
||||||
}
|
}
|
||||||
|
|
||||||
lines.append(builder.release());
|
lines.append(builder.release());
|
||||||
|
|
||||||
if (pFunc != NULL && strcmp(pFunc, "WinMain") == 0) {
|
if (pFunc != NULL && strcmp(pFunc, "WinMain") == 0) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@ -479,41 +483,36 @@ namespace
|
|||||||
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
|
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void handleInvalidParameter(const wchar_t * expresion, const wchar_t * function, const wchar_t * file, unsigned int line, uintptr_t reserved) {
|
static void handleInvalidParameter(const wchar_t * wexpresion, const wchar_t * wfunction, const wchar_t * wfile, unsigned int line, uintptr_t reserved) {
|
||||||
|
|
||||||
size_t convertedCharCount = 0;
|
size_t convertedCharCount = 0;
|
||||||
StringBuilder tmp;
|
|
||||||
|
StringBuilder expresion;
|
||||||
if (expresion != NULL) {
|
if (wexpresion != NULL) {
|
||||||
uint size = toU32(wcslen(expresion) + 1);
|
uint size = U32(wcslen(wexpresion) + 1);
|
||||||
tmp.reserve(size);
|
expresion.reserve(size);
|
||||||
wcstombs_s(&convertedCharCount, tmp.str(), size, expresion, _TRUNCATE);
|
wcstombs_s(&convertedCharCount, expresion.str(), size, wexpresion, _TRUNCATE);
|
||||||
|
|
||||||
nvDebug("*** Invalid parameter: %s\n", tmp.str());
|
|
||||||
|
|
||||||
if (file != NULL) {
|
|
||||||
size = toU32(wcslen(file) + 1);
|
|
||||||
tmp.reserve(size);
|
|
||||||
wcstombs_s(&convertedCharCount, tmp.str(), size, file, _TRUNCATE);
|
|
||||||
|
|
||||||
nvDebug(" On file: %s\n", tmp.str());
|
|
||||||
|
|
||||||
if (function != NULL) {
|
|
||||||
size = toU32(wcslen(function) + 1);
|
|
||||||
tmp.reserve(size);
|
|
||||||
wcstombs_s(&convertedCharCount, tmp.str(), size, function, _TRUNCATE);
|
|
||||||
|
|
||||||
nvDebug(" On function: %s\n", tmp.str());
|
|
||||||
}
|
|
||||||
|
|
||||||
nvDebug(" On line: %u\n", line);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
nvDebugBreak();
|
StringBuilder file;
|
||||||
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
|
if (wfile != NULL) {
|
||||||
}
|
uint size = U32(wcslen(wfile) + 1);
|
||||||
|
file.reserve(size);
|
||||||
|
wcstombs_s(&convertedCharCount, file.str(), size, wfile, _TRUNCATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
StringBuilder function;
|
||||||
|
if (wfunction != NULL) {
|
||||||
|
uint size = U32(wcslen(wfunction) + 1);
|
||||||
|
function.reserve(size);
|
||||||
|
wcstombs_s(&convertedCharCount, function.str(), size, wfunction, _TRUNCATE);
|
||||||
|
}
|
||||||
|
|
||||||
|
int result = nvAbort(expresion.str(), file.str(), line, function.str());
|
||||||
|
if (result == NV_ABORT_DEBUG) {
|
||||||
|
nvDebugBreak();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN
|
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN
|
||||||
|
|
||||||
@ -770,7 +769,7 @@ namespace
|
|||||||
|
|
||||||
if (s_interactive) {
|
if (s_interactive) {
|
||||||
flushMessageQueue();
|
flushMessageQueue();
|
||||||
int action = MessageBoxA(NULL, error_string.str(), "Assertion failed", MB_ABORTRETRYIGNORE|MB_ICONERROR);
|
int action = MessageBoxA(NULL, error_string.str(), "Assertion failed", MB_ABORTRETRYIGNORE | MB_ICONERROR | MB_TOPMOST);
|
||||||
switch( action ) {
|
switch( action ) {
|
||||||
case IDRETRY:
|
case IDRETRY:
|
||||||
ret = NV_ABORT_DEBUG;
|
ret = NV_ABORT_DEBUG;
|
||||||
@ -851,11 +850,10 @@ namespace
|
|||||||
printStackTrace(trace, size, 2);
|
printStackTrace(trace, size, 2);
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
//SBtodoORBIS check for debugger present
|
if (debug::isDebuggerPresent())
|
||||||
//if (debug::isDebuggerPresent())
|
return NV_ABORT_DEBUG;
|
||||||
nvDebugBreak();
|
|
||||||
|
|
||||||
return NV_ABORT_DEBUG;
|
return NV_ABORT_IGNORE;
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -892,9 +890,9 @@ namespace
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
if( ret == NV_ABORT_EXIT ) {
|
if( ret == NV_ABORT_EXIT ) {
|
||||||
// Exit cleanly.
|
// Exit cleanly.
|
||||||
exit(EXIT_FAILURE + 1);
|
exit(EXIT_FAILURE + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
@ -1190,6 +1188,12 @@ bool debug::isDebuggerPresent()
|
|||||||
#else
|
#else
|
||||||
return false;
|
return false;
|
||||||
#endif
|
#endif
|
||||||
|
#elif NV_OS_ORBIS
|
||||||
|
#if PS4_FINAL_REQUIREMENTS
|
||||||
|
return false;
|
||||||
|
#else
|
||||||
|
return sceDbgIsDebuggerAttached() == 1;
|
||||||
|
#endif
|
||||||
#elif NV_OS_DARWIN
|
#elif NV_OS_DARWIN
|
||||||
int mib[4];
|
int mib[4];
|
||||||
struct kinfo_proc info;
|
struct kinfo_proc info;
|
||||||
|
@ -34,7 +34,9 @@
|
|||||||
# if NV_CC_MSVC
|
# if NV_CC_MSVC
|
||||||
// @@ Does this work in msvc-6 and earlier?
|
// @@ Does this work in msvc-6 and earlier?
|
||||||
# define nvDebugBreak() __debugbreak()
|
# define nvDebugBreak() __debugbreak()
|
||||||
//#define nvDebugBreak() __asm { int 3 }
|
//# define nvDebugBreak() __asm { int 3 }
|
||||||
|
# elif NV_OS_ORBIS
|
||||||
|
# define nvDebugBreak() __debugbreak()
|
||||||
# elif NV_CC_GNUC
|
# elif NV_CC_GNUC
|
||||||
# define nvDebugBreak() __builtin_trap()
|
# define nvDebugBreak() __builtin_trap()
|
||||||
# else
|
# else
|
||||||
@ -158,7 +160,7 @@
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...);
|
NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...) __attribute__((format (printf, 5, 6)));
|
||||||
NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
|
NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
|
||||||
|
|
||||||
namespace nv
|
namespace nv
|
||||||
|
@ -8,24 +8,30 @@
|
|||||||
// Function linkage
|
// Function linkage
|
||||||
#define DLL_IMPORT
|
#define DLL_IMPORT
|
||||||
#if __GNUC__ >= 4
|
#if __GNUC__ >= 4
|
||||||
# define DLL_EXPORT __attribute__((visibility("default")))
|
# define DLL_EXPORT __attribute__((visibility("default")))
|
||||||
# define DLL_EXPORT_CLASS DLL_EXPORT
|
# define DLL_EXPORT_CLASS DLL_EXPORT
|
||||||
#else
|
#else
|
||||||
# define DLL_EXPORT
|
# define DLL_EXPORT
|
||||||
# define DLL_EXPORT_CLASS
|
# define DLL_EXPORT_CLASS
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Function calling modes
|
// Function calling modes
|
||||||
#if NV_CPU_X86
|
#if NV_CPU_X86
|
||||||
# define NV_CDECL __attribute__((cdecl))
|
# define NV_CDECL __attribute__((cdecl))
|
||||||
# define NV_STDCALL __attribute__((stdcall))
|
# define NV_STDCALL __attribute__((stdcall))
|
||||||
#else
|
#else
|
||||||
# define NV_CDECL
|
# define NV_CDECL
|
||||||
# define NV_STDCALL
|
# define NV_STDCALL
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define NV_FASTCALL __attribute__((fastcall))
|
#define NV_FASTCALL __attribute__((fastcall))
|
||||||
#define NV_FORCEINLINE __attribute__((always_inline)) inline
|
//#if __GNUC__ > 3
|
||||||
|
// It seems that GCC does not assume always_inline implies inline. I think this depends on the GCC version :(
|
||||||
|
#define NV_FORCEINLINE inline __attribute__((always_inline))
|
||||||
|
//#else
|
||||||
|
// Some compilers complain that inline and always_inline are redundant.
|
||||||
|
//#define NV_FORCEINLINE __attribute__((always_inline))
|
||||||
|
//#endif
|
||||||
#define NV_DEPRECATED __attribute__((deprecated))
|
#define NV_DEPRECATED __attribute__((deprecated))
|
||||||
#define NV_THREAD_LOCAL __thread
|
#define NV_THREAD_LOCAL __thread
|
||||||
|
|
||||||
@ -41,13 +47,13 @@
|
|||||||
|
|
||||||
// Define __FUNC__ properly.
|
// Define __FUNC__ properly.
|
||||||
#if __STDC_VERSION__ < 199901L
|
#if __STDC_VERSION__ < 199901L
|
||||||
# if __GNUC__ >= 2
|
# if __GNUC__ >= 2
|
||||||
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
|
# define __FUNC__ __PRETTY_FUNCTION__ // __FUNCTION__
|
||||||
# else
|
# else
|
||||||
# define __FUNC__ "<unknown>"
|
# define __FUNC__ "<unknown>"
|
||||||
# endif
|
# endif
|
||||||
#else
|
#else
|
||||||
# define __FUNC__ __PRETTY_FUNCTION__
|
# define __FUNC__ __PRETTY_FUNCTION__
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define restrict __restrict__
|
#define restrict __restrict__
|
||||||
|
@ -26,7 +26,7 @@
|
|||||||
#define chdir _chdir
|
#define chdir _chdir
|
||||||
#define getcwd _getcwd
|
#define getcwd _getcwd
|
||||||
|
|
||||||
#if _MSC_VER < 1800 // Not sure what version introduced this.
|
#if _MSC_VER < 1800 // Not sure what version introduced this.
|
||||||
#define va_copy(a, b) (a) = (b)
|
#define va_copy(a, b) (a) = (b)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -9,6 +9,8 @@
|
|||||||
#include <direct.h> // _mkdir
|
#include <direct.h> // _mkdir
|
||||||
#elif NV_OS_XBOX
|
#elif NV_OS_XBOX
|
||||||
#include <Xtl.h>
|
#include <Xtl.h>
|
||||||
|
#elif NV_OS_ORBIS
|
||||||
|
#include <fios2.h>
|
||||||
#else
|
#else
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
@ -29,6 +31,11 @@ bool FileSystem::exists(const char * path)
|
|||||||
// PathFileExists requires linking to shlwapi.lib
|
// PathFileExists requires linking to shlwapi.lib
|
||||||
//return PathFileExists(path) != 0;
|
//return PathFileExists(path) != 0;
|
||||||
return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES;
|
return GetFileAttributesA(path) != INVALID_FILE_ATTRIBUTES;
|
||||||
|
#elif NV_OS_ORBIS
|
||||||
|
const int BUFFER_SIZE = 2048;
|
||||||
|
char file_fullpath[BUFFER_SIZE];
|
||||||
|
snprintf(file_fullpath, BUFFER_SIZE, "/app0/%s", path);
|
||||||
|
return sceFiosExistsSync(NULL, file_fullpath);
|
||||||
#else
|
#else
|
||||||
if (FILE * fp = fopen(path, "r"))
|
if (FILE * fp = fopen(path, "r"))
|
||||||
{
|
{
|
||||||
@ -43,6 +50,9 @@ bool FileSystem::createDirectory(const char * path)
|
|||||||
{
|
{
|
||||||
#if NV_OS_WIN32 || NV_OS_XBOX
|
#if NV_OS_WIN32 || NV_OS_XBOX
|
||||||
return CreateDirectoryA(path, NULL) != 0;
|
return CreateDirectoryA(path, NULL) != 0;
|
||||||
|
#elif NV_OS_ORBIS
|
||||||
|
// not implemented
|
||||||
|
return false;
|
||||||
#else
|
#else
|
||||||
return mkdir(path, 0777) != -1;
|
return mkdir(path, 0777) != -1;
|
||||||
#endif
|
#endif
|
||||||
@ -55,6 +65,9 @@ bool FileSystem::changeDirectory(const char * path)
|
|||||||
#elif NV_OS_XBOX
|
#elif NV_OS_XBOX
|
||||||
// Xbox doesn't support Current Working Directory!
|
// Xbox doesn't support Current Working Directory!
|
||||||
return false;
|
return false;
|
||||||
|
#elif NV_OS_ORBIS
|
||||||
|
// Orbis doesn't support Current Working Directory!
|
||||||
|
return false;
|
||||||
#else
|
#else
|
||||||
return chdir(path) != -1;
|
return chdir(path) != -1;
|
||||||
#endif
|
#endif
|
||||||
|
@ -50,6 +50,7 @@ struct PseudoIndexWrapper {
|
|||||||
// Declare foreach keyword.
|
// Declare foreach keyword.
|
||||||
#if !defined NV_NO_USE_KEYWORDS
|
#if !defined NV_NO_USE_KEYWORDS
|
||||||
# define foreach NV_FOREACH
|
# define foreach NV_FOREACH
|
||||||
|
# define foreach_index NV_FOREACH
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
@ -55,6 +55,10 @@ namespace nv {
|
|||||||
::free((void *)ptr);
|
::free((void *)ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T> NV_FORCEINLINE void zero(T & data) {
|
||||||
|
memset(&data, 0, sizeof(T));
|
||||||
|
}
|
||||||
|
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
|
||||||
#endif // NV_CORE_MEMORY_H
|
#endif // NV_CORE_MEMORY_H
|
||||||
|
@ -37,7 +37,7 @@ namespace nv
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
/// Ctor.
|
/// Ctor.
|
||||||
StdStream( FILE * fp, bool autoclose=true ) : m_fp(fp), m_autoclose(autoclose) { }
|
StdStream( FILE * fp, bool autoclose ) : m_fp(fp), m_autoclose(autoclose) { }
|
||||||
|
|
||||||
/// Dtor.
|
/// Dtor.
|
||||||
virtual ~StdStream()
|
virtual ~StdStream()
|
||||||
@ -108,7 +108,8 @@ namespace nv
|
|||||||
// implementation uses use ftell and fseek to determine our location within the file.
|
// implementation uses use ftell and fseek to determine our location within the file.
|
||||||
virtual bool isAtEnd() const
|
virtual bool isAtEnd() const
|
||||||
{
|
{
|
||||||
nvDebugCheck(m_fp != NULL);
|
if (m_fp == NULL) return true;
|
||||||
|
//nvDebugCheck(m_fp != NULL);
|
||||||
//return feof( m_fp ) != 0;
|
//return feof( m_fp ) != 0;
|
||||||
#if NV_OS_WIN32
|
#if NV_OS_WIN32
|
||||||
uint pos = _ftell_nolock(m_fp);
|
uint pos = _ftell_nolock(m_fp);
|
||||||
@ -143,10 +144,10 @@ namespace nv
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
/// Construct stream by file name.
|
/// Construct stream by file name.
|
||||||
StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb")) { }
|
StdOutputStream( const char * name ) : StdStream(fileOpen(name, "wb"), /*autoclose=*/true) { }
|
||||||
|
|
||||||
/// Construct stream by file handle.
|
/// Construct stream by file handle.
|
||||||
StdOutputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
|
StdOutputStream( FILE * fp, bool autoclose ) : StdStream(fp, autoclose)
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -193,7 +194,7 @@ namespace nv
|
|||||||
public:
|
public:
|
||||||
|
|
||||||
/// Construct stream by file name.
|
/// Construct stream by file name.
|
||||||
StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb")) { }
|
StdInputStream( const char * name ) : StdStream(fileOpen(name, "rb"), /*autoclose=*/true) { }
|
||||||
|
|
||||||
/// Construct stream by file handle.
|
/// Construct stream by file handle.
|
||||||
StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
|
StdInputStream( FILE * fp, bool autoclose=true ) : StdStream(fp, autoclose)
|
||||||
|
@ -73,17 +73,17 @@ namespace
|
|||||||
uint nv::strLen(const char * str)
|
uint nv::strLen(const char * str)
|
||||||
{
|
{
|
||||||
nvDebugCheck(str != NULL);
|
nvDebugCheck(str != NULL);
|
||||||
return toU32(strlen(str));
|
return U32(strlen(str));
|
||||||
}
|
}
|
||||||
|
|
||||||
int nv::strCmp(const char * s1, const char * s2)
|
int nv::strDiff(const char * s1, const char * s2)
|
||||||
{
|
{
|
||||||
nvDebugCheck(s1 != NULL);
|
nvDebugCheck(s1 != NULL);
|
||||||
nvDebugCheck(s2 != NULL);
|
nvDebugCheck(s2 != NULL);
|
||||||
return strcmp(s1, s2);
|
return strcmp(s1, s2);
|
||||||
}
|
}
|
||||||
|
|
||||||
int nv::strCaseCmp(const char * s1, const char * s2)
|
int nv::strCaseDiff(const char * s1, const char * s2)
|
||||||
{
|
{
|
||||||
nvDebugCheck(s1 != NULL);
|
nvDebugCheck(s1 != NULL);
|
||||||
nvDebugCheck(s1 != NULL);
|
nvDebugCheck(s1 != NULL);
|
||||||
@ -98,14 +98,14 @@ bool nv::strEqual(const char * s1, const char * s2)
|
|||||||
{
|
{
|
||||||
if (s1 == s2) return true;
|
if (s1 == s2) return true;
|
||||||
if (s1 == NULL || s2 == NULL) return false;
|
if (s1 == NULL || s2 == NULL) return false;
|
||||||
return strCmp(s1, s2) == 0;
|
return strcmp(s1, s2) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool nv::strCaseEqual(const char * s1, const char * s2)
|
bool nv::strCaseEqual(const char * s1, const char * s2)
|
||||||
{
|
{
|
||||||
if (s1 == s2) return true;
|
if (s1 == s2) return true;
|
||||||
if (s1 == NULL || s2 == NULL) return false;
|
if (s1 == NULL || s2 == NULL) return false;
|
||||||
return strCaseCmp(s1, s2) == 0;
|
return strCaseDiff(s1, s2) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool nv::strBeginsWith(const char * str, const char * prefix)
|
bool nv::strBeginsWith(const char * str, const char * prefix)
|
||||||
@ -122,7 +122,7 @@ bool nv::strEndsWith(const char * str, const char * suffix)
|
|||||||
return strncmp(str + ml - sl, suffix, sl) == 0;
|
return strncmp(str + ml - sl, suffix, sl) == 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// @@ Add asserts to detect overlap between dst and src?
|
||||||
void nv::strCpy(char * dst, uint size, const char * src)
|
void nv::strCpy(char * dst, uint size, const char * src)
|
||||||
{
|
{
|
||||||
nvDebugCheck(dst != NULL);
|
nvDebugCheck(dst != NULL);
|
||||||
@ -142,8 +142,9 @@ void nv::strCpy(char * dst, uint size, const char * src, uint len)
|
|||||||
#if NV_CC_MSVC && _MSC_VER >= 1400
|
#if NV_CC_MSVC && _MSC_VER >= 1400
|
||||||
strncpy_s(dst, size, src, len);
|
strncpy_s(dst, size, src, len);
|
||||||
#else
|
#else
|
||||||
NV_UNUSED(size);
|
int n = min(len+1, size);
|
||||||
strncpy(dst, src, len);
|
strncpy(dst, src, n);
|
||||||
|
dst[n-1] = '\0';
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -220,6 +221,13 @@ match:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool nv::isNumber(const char * str) {
|
||||||
|
while(*str != '\0') {
|
||||||
|
if (!isDigit(*str)) return false;
|
||||||
|
str++;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** Empty string. */
|
/** Empty string. */
|
||||||
@ -325,25 +333,20 @@ StringBuilder & StringBuilder::formatList( const char * fmt, va_list arg )
|
|||||||
|
|
||||||
/** Append a string. */
|
/** Append a string. */
|
||||||
StringBuilder & StringBuilder::append( const char * s )
|
StringBuilder & StringBuilder::append( const char * s )
|
||||||
|
{
|
||||||
|
return append(s, U32(strlen( s )));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/** Append a string. */
|
||||||
|
StringBuilder & StringBuilder::append(const char * s, uint len)
|
||||||
{
|
{
|
||||||
nvDebugCheck(s != NULL);
|
nvDebugCheck(s != NULL);
|
||||||
|
|
||||||
const uint slen = uint(strlen( s ));
|
uint offset = length();
|
||||||
|
const uint size = offset + len + 1;
|
||||||
if (m_str == NULL) {
|
reserve(size);
|
||||||
m_size = slen + 1;
|
strCpy(m_str + offset, len + 1, s, len);
|
||||||
m_str = strAlloc(m_size);
|
|
||||||
memcpy(m_str, s, m_size);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
const uint len = uint(strlen( m_str ));
|
|
||||||
if (m_size < len + slen + 1) {
|
|
||||||
m_size = len + slen + 1;
|
|
||||||
m_str = strReAlloc(m_str, m_size);
|
|
||||||
}
|
|
||||||
|
|
||||||
memcpy(m_str + len, s, slen + 1);
|
|
||||||
}
|
|
||||||
|
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
@ -35,11 +35,11 @@ namespace nv
|
|||||||
uint operator()(const char * str) const { return strHash(str); }
|
uint operator()(const char * str) const { return strHash(str); }
|
||||||
};
|
};
|
||||||
|
|
||||||
NVCORE_API uint strLen(const char * str) NV_PURE;
|
NVCORE_API uint strLen(const char * str) NV_PURE; // Asserts on NULL strings.
|
||||||
|
|
||||||
NVCORE_API int strCmp(const char * s1, const char * s2) NV_PURE;
|
NVCORE_API int strDiff(const char * s1, const char * s2) NV_PURE; // Asserts on NULL strings.
|
||||||
NVCORE_API int strCaseCmp(const char * s1, const char * s2) NV_PURE;
|
NVCORE_API int strCaseDiff(const char * s1, const char * s2) NV_PURE; // Asserts on NULL strings.
|
||||||
NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
|
NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
|
||||||
NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
|
NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings.
|
||||||
|
|
||||||
template <> struct Equal<const char *> {
|
template <> struct Equal<const char *> {
|
||||||
@ -56,6 +56,35 @@ namespace nv
|
|||||||
|
|
||||||
NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
|
NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
|
||||||
|
|
||||||
|
NVCORE_API bool isNumber(const char * str) NV_PURE;
|
||||||
|
|
||||||
|
/* @@ Implement these two functions and modify StringBuilder to use them?
|
||||||
|
NVCORE_API void strFormat(const char * dst, const char * fmt, ...);
|
||||||
|
NVCORE_API void strFormatList(const char * dst, const char * fmt, va_list arg);
|
||||||
|
|
||||||
|
template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) __attribute__((format (printf, 2, 3)));
|
||||||
|
template <size_t count> void strFormatSafe(char (&buffer)[count], const char *fmt, ...) {
|
||||||
|
va_list args;
|
||||||
|
va_start(args, fmt);
|
||||||
|
strFormatList(buffer, count, fmt, args);
|
||||||
|
va_end(args);
|
||||||
|
}
|
||||||
|
template <size_t count> void strFormatListSafe(char (&buffer)[count], const char *fmt, va_list arg) {
|
||||||
|
va_list tmp;
|
||||||
|
va_copy(tmp, args);
|
||||||
|
strFormatList(buffer, count, fmt, tmp);
|
||||||
|
va_end(tmp);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
template <int count> void strCpySafe(char (&buffer)[count], const char *src) {
|
||||||
|
strCpy(buffer, count, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <int count> void strCatSafe(char (&buffer)[count], const char * src) {
|
||||||
|
strCat(buffer, count, src);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
/// String builder.
|
/// String builder.
|
||||||
class NVCORE_CLASS StringBuilder
|
class NVCORE_CLASS StringBuilder
|
||||||
@ -73,9 +102,10 @@ namespace nv
|
|||||||
StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
|
StringBuilder & format( const char * format, ... ) __attribute__((format (printf, 2, 3)));
|
||||||
StringBuilder & formatList( const char * format, va_list arg );
|
StringBuilder & formatList( const char * format, va_list arg );
|
||||||
|
|
||||||
StringBuilder & append( const char * str );
|
StringBuilder & append(const char * str);
|
||||||
StringBuilder & appendFormat( const char * format, ... ) __attribute__((format (printf, 2, 3)));
|
StringBuilder & append(const char * str, uint len);
|
||||||
StringBuilder & appendFormatList( const char * format, va_list arg );
|
StringBuilder & appendFormat(const char * format, ...) __attribute__((format (printf, 2, 3)));
|
||||||
|
StringBuilder & appendFormatList(const char * format, va_list arg);
|
||||||
|
|
||||||
StringBuilder & appendSpace(uint n);
|
StringBuilder & appendSpace(uint n);
|
||||||
|
|
||||||
@ -162,9 +192,9 @@ namespace nv
|
|||||||
void stripExtension();
|
void stripExtension();
|
||||||
|
|
||||||
// statics
|
// statics
|
||||||
static char separator();
|
NVCORE_API static char separator();
|
||||||
static const char * fileName(const char *);
|
NVCORE_API static const char * fileName(const char *);
|
||||||
static const char * extension(const char *);
|
NVCORE_API static const char * extension(const char *);
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -328,6 +358,66 @@ namespace nv
|
|||||||
uint operator()(const String & str) const { return str.hash(); }
|
uint operator()(const String & str) const { return str.hash(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
// Like AutoPtr, but for const char strings.
|
||||||
|
class AutoString
|
||||||
|
{
|
||||||
|
NV_FORBID_COPY(AutoString);
|
||||||
|
NV_FORBID_HEAPALLOC();
|
||||||
|
public:
|
||||||
|
|
||||||
|
// Ctor.
|
||||||
|
AutoString(const char * p = NULL) : m_ptr(p) { }
|
||||||
|
|
||||||
|
#if NV_CC_CPP11
|
||||||
|
// Move ctor.
|
||||||
|
AutoString(AutoString && ap) : m_ptr(ap.m_ptr) { ap.m_ptr = NULL; }
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Dtor. Deletes owned pointer.
|
||||||
|
~AutoString() {
|
||||||
|
delete [] m_ptr;
|
||||||
|
m_ptr = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete owned pointer and assign new one.
|
||||||
|
void operator=(const char * p) {
|
||||||
|
if (p != m_ptr)
|
||||||
|
{
|
||||||
|
delete [] m_ptr;
|
||||||
|
m_ptr = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get pointer.
|
||||||
|
const char * ptr() const { return m_ptr; }
|
||||||
|
operator const char *() const { return m_ptr; }
|
||||||
|
|
||||||
|
// Relinquish ownership of the underlying pointer and returns that pointer.
|
||||||
|
const char * release() {
|
||||||
|
const char * tmp = m_ptr;
|
||||||
|
m_ptr = NULL;
|
||||||
|
return tmp;
|
||||||
|
}
|
||||||
|
|
||||||
|
// comparison operators.
|
||||||
|
friend bool operator == (const AutoString & ap, const char * const p) {
|
||||||
|
return (ap.ptr() == p);
|
||||||
|
}
|
||||||
|
friend bool operator != (const AutoString & ap, const char * const p) {
|
||||||
|
return (ap.ptr() != p);
|
||||||
|
}
|
||||||
|
friend bool operator == (const char * const p, const AutoString & ap) {
|
||||||
|
return (ap.ptr() == p);
|
||||||
|
}
|
||||||
|
friend bool operator != (const char * const p, const AutoString & ap) {
|
||||||
|
return (ap.ptr() != p);
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const char * m_ptr;
|
||||||
|
};
|
||||||
|
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
|
||||||
#endif // NV_CORE_STRING_H
|
#endif // NV_CORE_STRING_H
|
||||||
|
@ -78,7 +78,7 @@ namespace nv
|
|||||||
|
|
||||||
// friends
|
// friends
|
||||||
friend Stream & operator<<( Stream & s, bool & c ) {
|
friend Stream & operator<<( Stream & s, bool & c ) {
|
||||||
#if NV_OS_DARWIN
|
#if NV_OS_DARWIN && !NV_CC_CPP11
|
||||||
nvStaticCheck(sizeof(bool) == 4);
|
nvStaticCheck(sizeof(bool) == 4);
|
||||||
uint8 b = c ? 1 : 0;
|
uint8 b = c ? 1 : 0;
|
||||||
s.serialize( &b, 1 );
|
s.serialize( &b, 1 );
|
||||||
|
@ -26,7 +26,7 @@ void TextWriter::writeString(const char * str, uint len)
|
|||||||
s->serialize(const_cast<char *>(str), len);
|
s->serialize(const_cast<char *>(str), len);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextWriter::write(const char * format, ...)
|
void TextWriter::format(const char * format, ...)
|
||||||
{
|
{
|
||||||
va_list arg;
|
va_list arg;
|
||||||
va_start(arg,format);
|
va_start(arg,format);
|
||||||
@ -35,7 +35,7 @@ void TextWriter::write(const char * format, ...)
|
|||||||
va_end(arg);
|
va_end(arg);
|
||||||
}
|
}
|
||||||
|
|
||||||
void TextWriter::write(const char * format, va_list arg)
|
void TextWriter::formatList(const char * format, va_list arg)
|
||||||
{
|
{
|
||||||
va_list tmp;
|
va_list tmp;
|
||||||
va_copy(tmp, arg);
|
va_copy(tmp, arg);
|
||||||
|
@ -20,8 +20,8 @@ namespace nv
|
|||||||
|
|
||||||
void writeString(const char * str);
|
void writeString(const char * str);
|
||||||
void writeString(const char * str, uint len);
|
void writeString(const char * str, uint len);
|
||||||
void write(const char * format, ...) __attribute__((format (printf, 2, 3)));
|
void format(const char * format, ...) __attribute__((format (printf, 2, 3)));
|
||||||
void write(const char * format, va_list arg);
|
void formatList(const char * format, va_list arg);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
|
||||||
@ -35,19 +35,19 @@ namespace nv
|
|||||||
|
|
||||||
inline TextWriter & operator<<( TextWriter & tw, int i)
|
inline TextWriter & operator<<( TextWriter & tw, int i)
|
||||||
{
|
{
|
||||||
tw.write("%d", i);
|
tw.format("%d", i);
|
||||||
return tw;
|
return tw;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline TextWriter & operator<<( TextWriter & tw, uint i)
|
inline TextWriter & operator<<( TextWriter & tw, uint i)
|
||||||
{
|
{
|
||||||
tw.write("%u", i);
|
tw.format("%u", i);
|
||||||
return tw;
|
return tw;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline TextWriter & operator<<( TextWriter & tw, float f)
|
inline TextWriter & operator<<( TextWriter & tw, float f)
|
||||||
{
|
{
|
||||||
tw.write("%f", f);
|
tw.format("%f", f);
|
||||||
return tw;
|
return tw;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -29,78 +29,96 @@
|
|||||||
#define NV_HALF_MAX 65504.0F
|
#define NV_HALF_MAX 65504.0F
|
||||||
#define NV_FLOAT_MAX 3.402823466e+38F
|
#define NV_FLOAT_MAX 3.402823466e+38F
|
||||||
|
|
||||||
|
#define NV_INTEGER_TO_FLOAT_MAX 16777217 // Largest integer such that it and all smaller integers can be stored in a 32bit float.
|
||||||
|
|
||||||
|
|
||||||
namespace nv
|
namespace nv
|
||||||
{
|
{
|
||||||
// Less error prone than casting. From CB:
|
// Less error prone than casting. From CB:
|
||||||
// http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
|
// http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
|
||||||
|
|
||||||
|
// These intentionally look like casts.
|
||||||
|
|
||||||
// uint32 casts:
|
// uint32 casts:
|
||||||
template <typename T> inline uint32 toU32(T x) { return x; }
|
template <typename T> inline uint32 U32(T x) { return x; }
|
||||||
template <> inline uint32 toU32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
|
template <> inline uint32 U32<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT32_MAX); return (uint32)x; }
|
||||||
template <> inline uint32 toU32<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
|
template <> inline uint32 U32<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT32_MAX); return (uint32)x; }
|
||||||
//template <> inline uint32 toU32<uint32>(uint32 x) { return x; }
|
//template <> inline uint32 U32<uint32>(uint32 x) { return x; }
|
||||||
template <> inline uint32 toU32<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; }
|
template <> inline uint32 U32<int32>(int32 x) { nvDebugCheck(x >= 0); return (uint32)x; }
|
||||||
//template <> inline uint32 toU32<uint16>(uint16 x) { return x; }
|
//template <> inline uint32 U32<uint16>(uint16 x) { return x; }
|
||||||
template <> inline uint32 toU32<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; }
|
template <> inline uint32 U32<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint32)x; }
|
||||||
//template <> inline uint32 toU32<uint8>(uint8 x) { return x; }
|
//template <> inline uint32 U32<uint8>(uint8 x) { return x; }
|
||||||
template <> inline uint32 toU32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
|
template <> inline uint32 U32<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint32)x; }
|
||||||
|
|
||||||
// int32 casts:
|
// int32 casts:
|
||||||
template <typename T> inline int32 toI32(T x) { return x; }
|
template <typename T> inline int32 I32(T x) { return x; }
|
||||||
template <> inline int32 toI32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
|
template <> inline int32 I32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
|
||||||
template <> inline int32 toI32<int64>(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; }
|
template <> inline int32 I32<int64>(int64 x) { nvDebugCheck(x >= NV_INT32_MIN && x <= NV_UINT32_MAX); return (int32)x; }
|
||||||
template <> inline int32 toI32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
|
template <> inline int32 I32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT32_MAX); return (int32)x; }
|
||||||
//template <> inline int32 toI32<int32>(int32 x) { return x; }
|
//template <> inline int32 I32<int32>(int32 x) { return x; }
|
||||||
//template <> inline int32 toI32<uint16>(uint16 x) { return x; }
|
//template <> inline int32 I32<uint16>(uint16 x) { return x; }
|
||||||
//template <> inline int32 toI32<int16>(int16 x) { return x; }
|
//template <> inline int32 I32<int16>(int16 x) { return x; }
|
||||||
//template <> inline int32 toI32<uint8>(uint8 x) { return x; }
|
//template <> inline int32 I32<uint8>(uint8 x) { return x; }
|
||||||
//template <> inline int32 toI32<int8>(int8 x) { return x; }
|
//template <> inline int32 I32<int8>(int8 x) { return x; }
|
||||||
|
|
||||||
// uint16 casts:
|
// uint16 casts:
|
||||||
template <typename T> inline uint16 toU16(T x) { return x; }
|
template <typename T> inline uint16 U16(T x) { return x; }
|
||||||
template <> inline uint16 toU16<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
|
template <> inline uint16 U16<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
|
||||||
template <> inline uint16 toU16<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
|
template <> inline uint16 U16<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
|
||||||
template <> inline uint16 toU16<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
|
template <> inline uint16 U16<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT16_MAX); return (uint16)x; }
|
||||||
template <> inline uint16 toU16<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
|
template <> inline uint16 U16<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT16_MAX); return (uint16)x; }
|
||||||
//template <> inline uint16 toU16<uint16>(uint16 x) { return x; }
|
//template <> inline uint16 U16<uint16>(uint16 x) { return x; }
|
||||||
template <> inline uint16 toU16<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; }
|
template <> inline uint16 U16<int16>(int16 x) { nvDebugCheck(x >= 0); return (uint16)x; }
|
||||||
//template <> inline uint16 toU16<uint8>(uint8 x) { return x; }
|
//template <> inline uint16 U16<uint8>(uint8 x) { return x; }
|
||||||
template <> inline uint16 toU16<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; }
|
template <> inline uint16 U16<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint16)x; }
|
||||||
|
|
||||||
// int16 casts:
|
// int16 casts:
|
||||||
template <typename T> inline int16 toI16(T x) { return x; }
|
template <typename T> inline int16 I16(T x) { return x; }
|
||||||
template <> inline int16 toI16<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
|
template <> inline int16 I16<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
|
||||||
template <> inline int16 toI16<int64>(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
|
template <> inline int16 I16<int64>(int64 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
|
||||||
template <> inline int16 toI16<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
|
template <> inline int16 I16<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
|
||||||
template <> inline int16 toI16<int32>(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
|
template <> inline int16 I16<int32>(int32 x) { nvDebugCheck(x >= NV_INT16_MIN && x <= NV_UINT16_MAX); return (int16)x; }
|
||||||
template <> inline int16 toI16<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
|
template <> inline int16 I16<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT16_MAX); return (int16)x; }
|
||||||
//template <> inline int16 toI16<int16>(int16 x) { return x; }
|
//template <> inline int16 I16<int16>(int16 x) { return x; }
|
||||||
//template <> inline int16 toI16<uint8>(uint8 x) { return x; }
|
//template <> inline int16 I16<uint8>(uint8 x) { return x; }
|
||||||
//template <> inline int16 toI16<int8>(int8 x) { return x; }
|
//template <> inline int16 I16<int8>(int8 x) { return x; }
|
||||||
|
|
||||||
// uint8 casts:
|
// uint8 casts:
|
||||||
template <typename T> inline uint8 toU8(T x) { return x; }
|
template <typename T> inline uint8 U8(T x) { return x; }
|
||||||
template <> inline uint8 toU8<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
|
template <> inline uint8 U8<uint64>(uint64 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
|
||||||
template <> inline uint8 toU8<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
|
template <> inline uint8 U8<int64>(int64 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
|
||||||
template <> inline uint8 toU8<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
|
template <> inline uint8 U8<uint32>(uint32 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
|
||||||
template <> inline uint8 toU8<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
|
template <> inline uint8 U8<int32>(int32 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
|
||||||
template <> inline uint8 toU8<uint16>(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
|
template <> inline uint8 U8<uint16>(uint16 x) { nvDebugCheck(x <= NV_UINT8_MAX); return (uint8)x; }
|
||||||
template <> inline uint8 toU8<int16>(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
|
template <> inline uint8 U8<int16>(int16 x) { nvDebugCheck(x >= 0 && x <= NV_UINT8_MAX); return (uint8)x; }
|
||||||
//template <> inline uint8 toU8<uint8>(uint8 x) { return x; }
|
//template <> inline uint8 U8<uint8>(uint8 x) { return x; }
|
||||||
template <> inline uint8 toU8<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; }
|
template <> inline uint8 U8<int8>(int8 x) { nvDebugCheck(x >= 0); return (uint8)x; }
|
||||||
|
//template <> inline uint8 U8<float>(int8 x) { nvDebugCheck(x >= 0.0f && x <= 255.0f); return (uint8)x; }
|
||||||
|
|
||||||
// int8 casts:
|
// int8 casts:
|
||||||
template <typename T> inline int8 toI8(T x) { return x; }
|
template <typename T> inline int8 I8(T x) { return x; }
|
||||||
template <> inline int8 toI8<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
|
template <> inline int8 I8<uint64>(uint64 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
|
||||||
template <> inline int8 toI8<int64>(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
|
template <> inline int8 I8<int64>(int64 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
|
||||||
template <> inline int8 toI8<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
|
template <> inline int8 I8<uint32>(uint32 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
|
||||||
template <> inline int8 toI8<int32>(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
|
template <> inline int8 I8<int32>(int32 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
|
||||||
template <> inline int8 toI8<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
|
template <> inline int8 I8<uint16>(uint16 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
|
||||||
template <> inline int8 toI8<int16>(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
|
template <> inline int8 I8<int16>(int16 x) { nvDebugCheck(x >= NV_INT8_MIN && x <= NV_UINT8_MAX); return (int8)x; }
|
||||||
template <> inline int8 toI8<uint8>(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
|
template <> inline int8 I8<uint8>(uint8 x) { nvDebugCheck(x <= NV_INT8_MAX); return (int8)x; }
|
||||||
//template <> inline int8 toI8<int8>(int8 x) { return x; }
|
//template <> inline int8 I8<int8>(int8 x) { return x; }
|
||||||
|
|
||||||
|
// float casts:
|
||||||
|
template <typename T> inline float F32(T x) { return x; }
|
||||||
|
template <> inline float F32<uint64>(uint64 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
|
||||||
|
template <> inline float F32<int64>(int64 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
|
||||||
|
template <> inline float F32<uint32>(uint32 x) { nvDebugCheck(x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
|
||||||
|
template <> inline float F32<int32>(int32 x) { nvDebugCheck(x >= -NV_INTEGER_TO_FLOAT_MAX && x <= NV_INTEGER_TO_FLOAT_MAX); return (float)x; }
|
||||||
|
// The compiler should not complain about these conversions:
|
||||||
|
//template <> inline float F32<uint16>(uint16 x) { nvDebugCheck(return (float)x; }
|
||||||
|
//template <> inline float F32<int16>(int16 x) { nvDebugCheck(return (float)x; }
|
||||||
|
//template <> inline float F32<uint8>(uint8 x) { nvDebugCheck(return (float)x; }
|
||||||
|
//template <> inline float F32<int8>(int8 x) { nvDebugCheck(return (float)x; }
|
||||||
|
|
||||||
|
|
||||||
/// Swap two values.
|
/// Swap two values.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline void swap(T & a, T & b)
|
inline void swap(T & a, T & b)
|
||||||
@ -112,35 +130,40 @@ namespace nv
|
|||||||
|
|
||||||
/// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN.
|
/// Return the maximum of the two arguments. For floating point values, it returns the second value if the first is NaN.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline const T & max(const T & a, const T & b)
|
//inline const T & max(const T & a, const T & b)
|
||||||
|
inline T max(const T & a, const T & b)
|
||||||
{
|
{
|
||||||
return (b < a) ? a : b;
|
return (b < a) ? a : b;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the maximum of the three arguments.
|
/// Return the maximum of the three arguments.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline const T & max3(const T & a, const T & b, const T & c)
|
//inline const T & max3(const T & a, const T & b, const T & c)
|
||||||
|
inline T max3(const T & a, const T & b, const T & c)
|
||||||
{
|
{
|
||||||
return max(a, max(b, c));
|
return max(a, max(b, c));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the minimum of two values.
|
/// Return the minimum of two values.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline const T & min(const T & a, const T & b)
|
//inline const T & min(const T & a, const T & b)
|
||||||
|
inline T min(const T & a, const T & b)
|
||||||
{
|
{
|
||||||
return (a < b) ? a : b;
|
return (a < b) ? a : b;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the maximum of the three arguments.
|
/// Return the maximum of the three arguments.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline const T & min3(const T & a, const T & b, const T & c)
|
//inline const T & min3(const T & a, const T & b, const T & c)
|
||||||
|
inline T min3(const T & a, const T & b, const T & c)
|
||||||
{
|
{
|
||||||
return min(a, min(b, c));
|
return min(a, min(b, c));
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Clamp between two values.
|
/// Clamp between two values.
|
||||||
template <typename T>
|
template <typename T>
|
||||||
inline const T & clamp(const T & x, const T & a, const T & b)
|
//inline const T & clamp(const T & x, const T & a, const T & b)
|
||||||
|
inline T clamp(const T & x, const T & a, const T & b)
|
||||||
{
|
{
|
||||||
return min(max(x, a), b);
|
return min(max(x, a), b);
|
||||||
}
|
}
|
||||||
@ -217,7 +240,6 @@ namespace nv
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
|
void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
|
||||||
for (uint i = new_size; i < old_size; i++) {
|
for (uint i = new_size; i < old_size; i++) {
|
||||||
nvDebugCheck(ptr != NULL && isValidPtr(ptr));
|
|
||||||
(ptr+i)->~T(); // Explicit call to the destructor
|
(ptr+i)->~T(); // Explicit call to the destructor
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -127,6 +127,12 @@
|
|||||||
# error "Unsupported compiler"
|
# error "Unsupported compiler"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if NV_CC_MSVC
|
||||||
|
#define NV_CC_CPP11 (__cplusplus > 199711L)
|
||||||
|
#else
|
||||||
|
// @@ IC: This works in CLANG, about GCC?
|
||||||
|
#define NV_CC_CPP11 (__has_feature(cxx_deleted_functions) && __has_feature(cxx_rvalue_references) && __has_feature(cxx_static_assert))
|
||||||
|
#endif
|
||||||
|
|
||||||
// Endiannes:
|
// Endiannes:
|
||||||
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
|
#define NV_LITTLE_ENDIAN POSH_LITTLE_ENDIAN
|
||||||
@ -170,11 +176,16 @@ typedef uint32 uint;
|
|||||||
|
|
||||||
|
|
||||||
// Disable copy constructor and assignment operator.
|
// Disable copy constructor and assignment operator.
|
||||||
|
#if NV_CC_CPP11
|
||||||
|
#define NV_FORBID_COPY(C) \
|
||||||
|
C( const C & ) = delete; \
|
||||||
|
C &operator=( const C & ) = delete
|
||||||
|
#else
|
||||||
#define NV_FORBID_COPY(C) \
|
#define NV_FORBID_COPY(C) \
|
||||||
private: \
|
private: \
|
||||||
C( const C & ); \
|
C( const C & ); \
|
||||||
C &operator=( const C & )
|
C &operator=( const C & )
|
||||||
|
#endif
|
||||||
|
|
||||||
// Disable dynamic allocation on the heap.
|
// Disable dynamic allocation on the heap.
|
||||||
// See Prohibiting Heap-Based Objects in More Effective C++.
|
// See Prohibiting Heap-Based Objects in More Effective C++.
|
||||||
@ -205,8 +216,8 @@ typedef uint32 uint;
|
|||||||
#define NV_MULTI_LINE_MACRO_END } while(false)
|
#define NV_MULTI_LINE_MACRO_END } while(false)
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if __cplusplus > 199711L
|
#if NV_CC_CPP11
|
||||||
#define nvStaticCheck(x) static_assert(x, "Static assert "#x" failed")
|
#define nvStaticCheck(x) static_assert((x), "Static assert "#x" failed")
|
||||||
#else
|
#else
|
||||||
#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
|
#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
|
||||||
#endif
|
#endif
|
||||||
|
@ -138,9 +138,9 @@ uint BlockDXT1::evaluatePaletteNV5x(Color32 color_array[4]) const
|
|||||||
color_array[2].a = 0xFF;
|
color_array[2].a = 0xFF;
|
||||||
|
|
||||||
// Set all components to 0 to match DXT specs.
|
// Set all components to 0 to match DXT specs.
|
||||||
color_array[3].r = 0x00; // color_array[2].r;
|
color_array[3].r = 0x00;
|
||||||
color_array[3].g = 0x00; // color_array[2].g;
|
color_array[3].g = 0x00;
|
||||||
color_array[3].b = 0x00; // color_array[2].b;
|
color_array[3].b = 0x00;
|
||||||
color_array[3].a = 0x00;
|
color_array[3].a = 0x00;
|
||||||
|
|
||||||
return 3;
|
return 3;
|
||||||
@ -167,9 +167,9 @@ void BlockDXT1::evaluatePalette3(Color32 color_array[4], bool d3d9) const
|
|||||||
color_array[2].a = 0xFF;
|
color_array[2].a = 0xFF;
|
||||||
|
|
||||||
// Set all components to 0 to match DXT specs.
|
// Set all components to 0 to match DXT specs.
|
||||||
color_array[3].r = 0x00; // color_array[2].r;
|
color_array[3].r = 0x00;
|
||||||
color_array[3].g = 0x00; // color_array[2].g;
|
color_array[3].g = 0x00;
|
||||||
color_array[3].b = 0x00; // color_array[2].b;
|
color_array[3].b = 0x00;
|
||||||
color_array[3].a = 0x00;
|
color_array[3].a = 0x00;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -433,6 +433,22 @@ void AlphaBlockDXT5::decodeBlock(ColorBlock * block, bool d3d9/*= false*/) const
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void AlphaBlockDXT5::decodeBlock(AlphaBlock4x4 * block, bool d3d9/*= false*/) const
|
||||||
|
{
|
||||||
|
nvDebugCheck(block != NULL);
|
||||||
|
|
||||||
|
uint8 alpha_array[8];
|
||||||
|
evaluatePalette(alpha_array, d3d9);
|
||||||
|
|
||||||
|
uint8 index_array[16];
|
||||||
|
indices(index_array);
|
||||||
|
|
||||||
|
for(uint i = 0; i < 16; i++) {
|
||||||
|
block->alpha[i] = alpha_array[index_array[i]];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
void AlphaBlockDXT5::flip4()
|
void AlphaBlockDXT5::flip4()
|
||||||
{
|
{
|
||||||
uint64 * b = (uint64 *)this;
|
uint64 * b = (uint64 *)this;
|
||||||
|
@ -32,7 +32,8 @@
|
|||||||
namespace nv
|
namespace nv
|
||||||
{
|
{
|
||||||
struct ColorBlock;
|
struct ColorBlock;
|
||||||
struct ColorSet;
|
struct ColorSet;
|
||||||
|
struct AlphaBlock4x4;
|
||||||
class Stream;
|
class Stream;
|
||||||
|
|
||||||
|
|
||||||
@ -152,6 +153,7 @@ namespace nv
|
|||||||
void setIndex(uint index, uint value);
|
void setIndex(uint index, uint value);
|
||||||
|
|
||||||
void decodeBlock(ColorBlock * block, bool d3d9 = false) const;
|
void decodeBlock(ColorBlock * block, bool d3d9 = false) const;
|
||||||
|
void decodeBlock(AlphaBlock4x4 * block, bool d3d9 = false) const;
|
||||||
|
|
||||||
void flip4();
|
void flip4();
|
||||||
void flip2();
|
void flip2();
|
||||||
|
@ -6,6 +6,8 @@
|
|||||||
|
|
||||||
#include "nvmath/Box.h"
|
#include "nvmath/Box.h"
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
|
|
||||||
#include "nvcore/Utils.h" // swap
|
#include "nvcore/Utils.h" // swap
|
||||||
|
|
||||||
#include <string.h> // memcpy
|
#include <string.h> // memcpy
|
||||||
@ -519,11 +521,24 @@ void ColorSet::setColors(const float * data, uint img_w, uint img_h, uint img_x,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ColorSet::setColors(const Vector3 colors[16], const float weights[16])
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
void ColorSet::setColors(const Vector4 colors[16], const float weights[16])
|
||||||
|
{
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
void ColorSet::setAlphaWeights()
|
void ColorSet::setAlphaWeights()
|
||||||
{
|
{
|
||||||
for (uint i = 0; i < colorCount; i++)
|
for (uint i = 0; i < colorCount; i++)
|
||||||
{
|
{
|
||||||
weights[i] = max(colors[i].w, 0.001f); // Avoid division by zero.
|
//weights[i] = max(colors[i].w, 0.001f); // Avoid division by zero.
|
||||||
|
weights[i] = max(colors[i].w, 0.0f);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -539,6 +554,7 @@ void ColorSet::setUniformWeights()
|
|||||||
// @@ Handle complex blocks (not 4x4).
|
// @@ Handle complex blocks (not 4x4).
|
||||||
void ColorSet::createMinimalSet(bool ignoreTransparent)
|
void ColorSet::createMinimalSet(bool ignoreTransparent)
|
||||||
{
|
{
|
||||||
|
nvDebugCheck(indexCount == 16);
|
||||||
nvDebugCheck(colorCount <= 16);
|
nvDebugCheck(colorCount <= 16);
|
||||||
|
|
||||||
Vector4 C[16];
|
Vector4 C[16];
|
||||||
@ -556,7 +572,7 @@ void ColorSet::createMinimalSet(bool ignoreTransparent)
|
|||||||
Vector4 ci = C[indices[i]];
|
Vector4 ci = C[indices[i]];
|
||||||
float wi = W[indices[i]];
|
float wi = W[indices[i]];
|
||||||
|
|
||||||
if (ignoreTransparent && ci.w == 0) {
|
if (ignoreTransparent && wi == 0) {
|
||||||
indices[i] = -1;
|
indices[i] = -1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -582,9 +598,10 @@ void ColorSet::createMinimalSet(bool ignoreTransparent)
|
|||||||
n++;
|
n++;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
nvDebugCheck(n != 0);
|
//nvDebugCheck(n != 0); // Fully transparent blocks are OK.
|
||||||
|
|
||||||
for (uint i = n; i < colorCount; i++) {
|
for (uint i = n; i < colorCount; i++) {
|
||||||
|
colors[i] = Vector4(0);
|
||||||
weights[i] = 0;
|
weights[i] = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -594,6 +611,8 @@ void ColorSet::createMinimalSet(bool ignoreTransparent)
|
|||||||
if (colorCount == 0) {
|
if (colorCount == 0) {
|
||||||
colorCount = 1;
|
colorCount = 1;
|
||||||
indices[0] = 0;
|
indices[0] = 0;
|
||||||
|
//colors[0] = Vector4(0);
|
||||||
|
weights[0] = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -661,3 +680,59 @@ bool ColorSet::hasAlpha() const
|
|||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void AlphaBlock4x4::init(uint8 a)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
alpha[i] = a;
|
||||||
|
weights[i] = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AlphaBlock4x4::init(const ColorBlock & src, uint channel)
|
||||||
|
{
|
||||||
|
nvCheck(channel >= 0 && channel < 4);
|
||||||
|
|
||||||
|
// Colors are in BGRA format.
|
||||||
|
if (channel == 0) channel = 2;
|
||||||
|
else if (channel == 2) channel = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
alpha[i] = src.color(i).component[channel];
|
||||||
|
weights[i] = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void AlphaBlock4x4::init(const ColorSet & src, uint channel)
|
||||||
|
{
|
||||||
|
nvCheck(channel >= 0 && channel < 4);
|
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
float f = src.color(i).component[channel];
|
||||||
|
alpha[i] = unitFloatToFixed8(f);
|
||||||
|
weights[i] = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AlphaBlock4x4::initMaxRGB(const ColorSet & src, float threshold)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
float x = src.color(i).x;
|
||||||
|
float y = src.color(i).y;
|
||||||
|
float z = src.color(i).z;
|
||||||
|
alpha[i] = unitFloatToFixed8(max(max(x, y), max(z, threshold)));
|
||||||
|
weights[i] = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void AlphaBlock4x4::initWeights(const ColorSet & src)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
weights[i] = src.weight(i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -12,6 +12,7 @@ namespace nv
|
|||||||
class Image;
|
class Image;
|
||||||
class FloatImage;
|
class FloatImage;
|
||||||
|
|
||||||
|
|
||||||
/// Uncompressed 4x4 color block.
|
/// Uncompressed 4x4 color block.
|
||||||
struct ColorBlock
|
struct ColorBlock
|
||||||
{
|
{
|
||||||
@ -89,6 +90,8 @@ namespace nv
|
|||||||
void allocate(uint w, uint h);
|
void allocate(uint w, uint h);
|
||||||
|
|
||||||
void setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y);
|
void setColors(const float * data, uint img_w, uint img_h, uint img_x, uint img_y);
|
||||||
|
void setColors(const Vector3 colors[16], const float weights[16]);
|
||||||
|
void setColors(const Vector4 colors[16], const float weights[16]);
|
||||||
|
|
||||||
void setAlphaWeights();
|
void setAlphaWeights();
|
||||||
void setUniformWeights();
|
void setUniformWeights();
|
||||||
@ -108,6 +111,8 @@ namespace nv
|
|||||||
Vector4 color(uint i) const { nvDebugCheck(i < indexCount); return colors[indices[i]]; }
|
Vector4 color(uint i) const { nvDebugCheck(i < indexCount); return colors[indices[i]]; }
|
||||||
Vector4 & color(uint i) { nvDebugCheck(i < indexCount); return colors[indices[i]]; }
|
Vector4 & color(uint i) { nvDebugCheck(i < indexCount); return colors[indices[i]]; }
|
||||||
|
|
||||||
|
float weight(uint i) const { nvDebugCheck(i < indexCount); return weights[indices[i]]; }
|
||||||
|
|
||||||
bool isValidIndex(uint i) const { return i < indexCount && indices[i] >= 0; }
|
bool isValidIndex(uint i) const { return i < indexCount && indices[i] >= 0; }
|
||||||
|
|
||||||
uint colorCount;
|
uint colorCount;
|
||||||
@ -116,10 +121,40 @@ namespace nv
|
|||||||
|
|
||||||
// Allocate color set dynamically and add support for sets larger than 4x4.
|
// Allocate color set dynamically and add support for sets larger than 4x4.
|
||||||
Vector4 colors[16];
|
Vector4 colors[16];
|
||||||
float weights[16];
|
float weights[16]; // @@ Add mask to indicate what color components are weighted?
|
||||||
int indices[16];
|
int indices[16];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/// Uncompressed 4x4 alpha block.
|
||||||
|
struct AlphaBlock4x4
|
||||||
|
{
|
||||||
|
void init(uint8 value);
|
||||||
|
void init(const ColorBlock & src, uint channel);
|
||||||
|
void init(const ColorSet & src, uint channel);
|
||||||
|
|
||||||
|
void initMaxRGB(const ColorSet & src, float threshold);
|
||||||
|
void initWeights(const ColorSet & src);
|
||||||
|
|
||||||
|
uint8 alpha[4*4];
|
||||||
|
float weights[16];
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
struct FloatAlphaBlock4x4
|
||||||
|
{
|
||||||
|
float alphas[4 * 4];
|
||||||
|
float weights[4 * 4];
|
||||||
|
};
|
||||||
|
|
||||||
|
struct FloatColorBlock4x4
|
||||||
|
{
|
||||||
|
Vector4 colors[4 * 4];
|
||||||
|
float weights[4 * 4];
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
|
||||||
#endif // NV_IMAGE_COLORBLOCK_H
|
#endif // NV_IMAGE_COLORBLOCK_H
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
// This code is in the public domain -- jim@tilander.org
|
// This code is in the public domain -- jim@tilander.org
|
||||||
|
|
||||||
#include <nvcore/nvcore.h>
|
|
||||||
|
|
||||||
#include <nvmath/Color.h>
|
|
||||||
#include <nvimage/Image.h>
|
|
||||||
|
|
||||||
#include "ColorSpace.h"
|
#include "ColorSpace.h"
|
||||||
|
|
||||||
|
#include "nvimage/Image.h"
|
||||||
|
#include "nvmath/Color.h"
|
||||||
|
|
||||||
|
|
||||||
namespace nv
|
namespace nv
|
||||||
{
|
{
|
||||||
void ColorSpace::RGBtoYCoCg_R(Image* img)
|
void ColorSpace::RGBtoYCoCg_R(Image* img)
|
||||||
|
@ -952,7 +952,8 @@ bool DirectDrawSurface::isSupported() const
|
|||||||
header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM ||
|
header.header10.dxgiFormat == DXGI_FORMAT_BC3_UNORM ||
|
||||||
header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM ||
|
header.header10.dxgiFormat == DXGI_FORMAT_BC4_UNORM ||
|
||||||
header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM ||
|
header.header10.dxgiFormat == DXGI_FORMAT_BC5_UNORM ||
|
||||||
header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16)
|
header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16 ||
|
||||||
|
header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM)
|
||||||
{
|
{
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -1390,37 +1391,37 @@ void DirectDrawSurface::readBlock(ColorBlock * rgba)
|
|||||||
*stream << block;
|
*stream << block;
|
||||||
block.decodeBlock(rgba);
|
block.decodeBlock(rgba);
|
||||||
}
|
}
|
||||||
else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16)
|
else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC6H_UF16)
|
||||||
{
|
{
|
||||||
BlockBC6 block;
|
BlockBC6 block;
|
||||||
*stream << block;
|
*stream << block;
|
||||||
ColorSet set;
|
ColorSet set;
|
||||||
block.decodeBlock(&set);
|
block.decodeBlock(&set);
|
||||||
|
|
||||||
// Clamp to [0, 1] and round to 8-bit
|
// Clamp to [0, 1] and round to 8-bit
|
||||||
for (int y = 0; y < 4; ++y)
|
for (int y = 0; y < 4; ++y)
|
||||||
{
|
{
|
||||||
for (int x = 0; x < 4; ++x)
|
for (int x = 0; x < 4; ++x)
|
||||||
{
|
{
|
||||||
Vector4 px = set.colors[y*4 + x];
|
Vector4 px = set.colors[y*4 + x];
|
||||||
rgba->color(x, y).setRGBA(
|
rgba->color(x, y).setRGBA(
|
||||||
uint8(clamp(px.x, 0.0f, 1.0f) * 255.0f + 0.5f),
|
uint8(clamp(px.x, 0.0f, 1.0f) * 255.0f + 0.5f),
|
||||||
uint8(clamp(px.y, 0.0f, 1.0f) * 255.0f + 0.5f),
|
uint8(clamp(px.y, 0.0f, 1.0f) * 255.0f + 0.5f),
|
||||||
uint8(clamp(px.z, 0.0f, 1.0f) * 255.0f + 0.5f),
|
uint8(clamp(px.z, 0.0f, 1.0f) * 255.0f + 0.5f),
|
||||||
uint8(clamp(px.w, 0.0f, 1.0f) * 255.0f + 0.5f));
|
uint8(clamp(px.w, 0.0f, 1.0f) * 255.0f + 0.5f));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM)
|
else if (header.hasDX10Header() && header.header10.dxgiFormat == DXGI_FORMAT_BC7_UNORM)
|
||||||
{
|
{
|
||||||
BlockBC7 block;
|
BlockBC7 block;
|
||||||
*stream << block;
|
*stream << block;
|
||||||
block.decodeBlock(rgba);
|
block.decodeBlock(rgba);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
nvDebugCheck(false);
|
nvDebugCheck(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// If normal flag set, convert to normal.
|
// If normal flag set, convert to normal.
|
||||||
if (header.pf.flags & DDPF_NORMAL)
|
if (header.pf.flags & DDPF_NORMAL)
|
||||||
|
@ -7,6 +7,7 @@
|
|||||||
#include "nvmath/Color.h"
|
#include "nvmath/Color.h"
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
|
|
||||||
#include "nvcore/Utils.h" // max
|
#include "nvcore/Utils.h" // max
|
||||||
#include "nvcore/Ptr.h"
|
#include "nvcore/Ptr.h"
|
||||||
|
@ -40,7 +40,7 @@ const Image & Image::operator=(const Image & img)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Image::allocate(uint w, uint h, uint d)
|
void Image::allocate(uint w, uint h, uint d/*= 1*/)
|
||||||
{
|
{
|
||||||
free();
|
free();
|
||||||
m_width = w;
|
m_width = w;
|
||||||
@ -49,6 +49,45 @@ void Image::allocate(uint w, uint h, uint d)
|
|||||||
m_data = realloc<Color32>(m_data, w * h * d);
|
m_data = realloc<Color32>(m_data, w * h * d);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Image::resize(uint w, uint h, uint d/*= 1*/) {
|
||||||
|
|
||||||
|
Image img;
|
||||||
|
img.allocate(w, h, d);
|
||||||
|
|
||||||
|
Color32 background(0,0,0,0);
|
||||||
|
|
||||||
|
// Copy image.
|
||||||
|
uint x, y, z;
|
||||||
|
for(z = 0; z < min(d, m_depth); z++) {
|
||||||
|
for(y = 0; y < min(h, m_height); y++) {
|
||||||
|
for(x = 0; x < min(w, m_width); x++) {
|
||||||
|
img.pixel(x, y, z) = pixel(x, y, z);
|
||||||
|
}
|
||||||
|
for(; x < w; x++) {
|
||||||
|
img.pixel(x, y, z) = background;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(; y < h; y++) {
|
||||||
|
for(x = 0; x < w; x++) {
|
||||||
|
img.pixel(x, y, z) = background;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(; z < d; z++) {
|
||||||
|
for(y = 0; y < h; y++) {
|
||||||
|
for(x = 0; x < w; x++) {
|
||||||
|
img.pixel(x, y, z) = background;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
swap(m_width, img.m_width);
|
||||||
|
swap(m_height, img.m_height);
|
||||||
|
swap(m_depth, img.m_depth);
|
||||||
|
swap(m_format, img.m_format);
|
||||||
|
swap(m_data, img.m_data);
|
||||||
|
}
|
||||||
|
|
||||||
bool Image::load(const char * name)
|
bool Image::load(const char * name)
|
||||||
{
|
{
|
||||||
free();
|
free();
|
||||||
|
@ -32,6 +32,8 @@ namespace nv
|
|||||||
void allocate(uint w, uint h, uint d = 1);
|
void allocate(uint w, uint h, uint d = 1);
|
||||||
bool load(const char * name);
|
bool load(const char * name);
|
||||||
|
|
||||||
|
void resize(uint w, uint h, uint d = 1);
|
||||||
|
|
||||||
void wrap(void * data, uint w, uint h, uint d = 1);
|
void wrap(void * data, uint w, uint h, uint d = 1);
|
||||||
void unwrap();
|
void unwrap();
|
||||||
|
|
||||||
|
@ -319,9 +319,9 @@ static bool savePPM(Stream & s, const Image * img)
|
|||||||
uint h = img->height();
|
uint h = img->height();
|
||||||
|
|
||||||
TextWriter writer(&s);
|
TextWriter writer(&s);
|
||||||
writer.write("P6\n");
|
writer.format("P6\n");
|
||||||
writer.write("%d %d\n", w, h);
|
writer.format("%d %d\n", w, h);
|
||||||
writer.write("255\n");
|
writer.writeString("255\n");
|
||||||
for (uint i = 0; i < w * h; i++) {
|
for (uint i = 0; i < w * h; i++) {
|
||||||
Color32 c = img->pixel(i);
|
Color32 c = img->pixel(i);
|
||||||
s << c.r << c.g << c.b;
|
s << c.r << c.g << c.b;
|
||||||
@ -501,14 +501,16 @@ static FloatImage * loadFloatDDS(Stream & s)
|
|||||||
DDSHeader header;
|
DDSHeader header;
|
||||||
s << header;
|
s << header;
|
||||||
|
|
||||||
static const uint D3DFMT_A16B16G16R16F = 113;
|
// @@ We only support a few formats for now.
|
||||||
|
|
||||||
// @@ We only support RGBA16F for now.
|
|
||||||
if (header.pf.fourcc == D3DFMT_A16B16G16R16F) {
|
if (header.pf.fourcc == D3DFMT_A16B16G16R16F) {
|
||||||
const int size = header.width * header.height;
|
const int size = header.width * header.height;
|
||||||
uint16 * const data = new uint16[size * 4];
|
uint16 * const data = new uint16[size * 4];
|
||||||
|
|
||||||
s.serialize(data, size * 4 * sizeof(uint16));
|
//s.serialize(data, size * 4 * sizeof(uint16));
|
||||||
|
for (int i = 0; i < 4* size; i++) {
|
||||||
|
s << data[i];
|
||||||
|
}
|
||||||
|
|
||||||
FloatImage * img = new FloatImage;
|
FloatImage * img = new FloatImage;
|
||||||
img->allocate(4, header.width, header.height);
|
img->allocate(4, header.width, header.height);
|
||||||
@ -530,7 +532,84 @@ static FloatImage * loadFloatDDS(Stream & s)
|
|||||||
|
|
||||||
return img;
|
return img;
|
||||||
}
|
}
|
||||||
|
else if (header.pf.fourcc == D3DFMT_R32F) {
|
||||||
|
const int size = header.width * header.height;
|
||||||
|
float * const data = new float[size];
|
||||||
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
s << data[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
FloatImage * img = new FloatImage;
|
||||||
|
img->allocate(4, header.width, header.height);
|
||||||
|
|
||||||
|
float * r = img->channel(0);
|
||||||
|
|
||||||
|
float * ptr = data;
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
*r++ = *ptr++;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete [] data;
|
||||||
|
|
||||||
|
img->clear(1, 0.0f);
|
||||||
|
img->clear(2, 0.0f);
|
||||||
|
img->clear(3, 1.0f);
|
||||||
|
|
||||||
|
return img;
|
||||||
|
}
|
||||||
|
else if (header.pf.fourcc == D3DFMT_L16 || (header.pf.bitcount == 16 && header.pf.rmask == 0xFFFF && header.pf.gmask == 0 && header.pf.bmask == 0 && header.pf.amask == 0))
|
||||||
|
{
|
||||||
|
const int size = header.width * header.height;
|
||||||
|
uint16 * const data = new uint16[size];
|
||||||
|
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
s << data[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
FloatImage * img = new FloatImage;
|
||||||
|
img->allocate(4, header.width, header.height);
|
||||||
|
|
||||||
|
float * r = img->channel(0);
|
||||||
|
|
||||||
|
uint16 * ptr = data;
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
*r++ = float(*ptr++) / 65535.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete [] data;
|
||||||
|
|
||||||
|
img->clear(1, 0.0f);
|
||||||
|
img->clear(2, 0.0f);
|
||||||
|
img->clear(3, 1.0f);
|
||||||
|
|
||||||
|
return img;
|
||||||
|
}
|
||||||
|
else if (header.pf.fourcc == D3DFMT_L8 || (header.pf.bitcount == 8 && header.pf.rmask == 0xFF && header.pf.gmask == 0 && header.pf.bmask == 0 && header.pf.amask == 0))
|
||||||
|
{
|
||||||
|
const int size = header.width * header.height;
|
||||||
|
uint8 * const data = new uint8[size];
|
||||||
|
|
||||||
|
s.serialize(data, size);
|
||||||
|
|
||||||
|
FloatImage * img = new FloatImage;
|
||||||
|
img->allocate(4, header.width, header.height);
|
||||||
|
|
||||||
|
float * r = img->channel(0);
|
||||||
|
|
||||||
|
uint8 * ptr = data;
|
||||||
|
for (int i = 0; i < size; i++) {
|
||||||
|
*r++ = float(*ptr++) / 255.0f;
|
||||||
|
}
|
||||||
|
|
||||||
|
delete [] data;
|
||||||
|
|
||||||
|
img->clear(1, 0.0f);
|
||||||
|
img->clear(2, 0.0f);
|
||||||
|
img->clear(3, 1.0f);
|
||||||
|
|
||||||
|
return img;
|
||||||
|
}
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1713,26 +1792,26 @@ Image * nv::ImageIO::load(const char * fileName, Stream & s)
|
|||||||
|
|
||||||
const char * extension = Path::extension(fileName);
|
const char * extension = Path::extension(fileName);
|
||||||
|
|
||||||
if (strCaseCmp(extension, ".tga") == 0) {
|
if (strCaseDiff(extension, ".tga") == 0) {
|
||||||
return loadTGA(s);
|
return loadTGA(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strCaseCmp(extension, ".psd") == 0) {
|
if (strCaseDiff(extension, ".psd") == 0) {
|
||||||
return loadPSD(s);
|
return loadPSD(s);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*if (strCaseCmp(extension, ".ppm") == 0) {
|
/*if (strCaseDiff(extension, ".ppm") == 0) {
|
||||||
return loadPPM(s);
|
return loadPPM(s);
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
#if defined(HAVE_JPEG)
|
#if defined(HAVE_JPEG)
|
||||||
if (strCaseCmp(extension, ".jpg") == 0 || strCaseCmp(extension, ".jpeg") == 0) {
|
if (strCaseDiff(extension, ".jpg") == 0 || strCaseDiff(extension, ".jpeg") == 0) {
|
||||||
return loadJPG(s);
|
return loadJPG(s);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_PNG)
|
#if defined(HAVE_PNG)
|
||||||
if (strCaseCmp(extension, ".png") == 0) {
|
if (strCaseDiff(extension, ".png") == 0) {
|
||||||
return loadPNG(s);
|
return loadPNG(s);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -1759,16 +1838,16 @@ bool nv::ImageIO::save(const char * fileName, Stream & s, const Image * img, con
|
|||||||
|
|
||||||
const char * extension = Path::extension(fileName);
|
const char * extension = Path::extension(fileName);
|
||||||
|
|
||||||
if (strCaseCmp(extension, ".tga") == 0) {
|
if (strCaseDiff(extension, ".tga") == 0) {
|
||||||
return saveTGA(s, img);
|
return saveTGA(s, img);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strCaseCmp(extension, ".ppm") == 0) {
|
if (strCaseDiff(extension, ".ppm") == 0) {
|
||||||
return savePPM(s, img);
|
return savePPM(s, img);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAVE_PNG)
|
#if defined(HAVE_PNG)
|
||||||
if (strCaseCmp(extension, ".png") == 0) {
|
if (strCaseDiff(extension, ".png") == 0) {
|
||||||
return savePNG(s, img, tags);
|
return savePNG(s, img, tags);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -1816,20 +1895,20 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
|
|||||||
|
|
||||||
const char * extension = Path::extension(fileName);
|
const char * extension = Path::extension(fileName);
|
||||||
|
|
||||||
/*if (strCaseCmp(extension, ".pfm") == 0) {
|
/*if (strCaseDiff(extension, ".pfm") == 0) {
|
||||||
return loadFloatPFM(s);
|
return loadFloatPFM(s);
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
#if defined(HAVE_TIFF)
|
#if defined(HAVE_TIFF)
|
||||||
#pragma NV_MESSAGE("TODO: Load TIFF from stream.")
|
#pragma NV_MESSAGE("TODO: Load TIFF from stream.")
|
||||||
if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) {
|
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
|
||||||
return loadFloatTIFF(fileName, s);
|
return loadFloatTIFF(fileName, s);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_OPENEXR)
|
#if defined(HAVE_OPENEXR)
|
||||||
#pragma NV_MESSAGE("TODO: Load EXR from stream.")
|
#pragma NV_MESSAGE("TODO: Load EXR from stream.")
|
||||||
if (strCaseCmp(extension, ".exr") == 0) {
|
if (strCaseDiff(extension, ".exr") == 0) {
|
||||||
return loadFloatEXR(fileName, s);
|
return loadFloatEXR(fileName, s);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
@ -1841,7 +1920,7 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (strCaseCmp(extension, ".dds") == 0) {
|
if (strCaseDiff(extension, ".dds") == 0) {
|
||||||
const uint spos = s.tell(); // Save stream position.
|
const uint spos = s.tell(); // Save stream position.
|
||||||
FloatImage * floatImage = loadFloatDDS(s);
|
FloatImage * floatImage = loadFloatDDS(s);
|
||||||
if (floatImage != NULL) return floatImage;
|
if (floatImage != NULL) return floatImage;
|
||||||
@ -1868,11 +1947,11 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
|
|||||||
|
|
||||||
const char * extension = Path::extension(fileName);
|
const char * extension = Path::extension(fileName);
|
||||||
|
|
||||||
if (strCaseCmp(extension, ".dds") == 0) {
|
if (strCaseDiff(extension, ".dds") == 0) {
|
||||||
return saveFloatDDS(s, fimage, baseComponent, componentCount);
|
return saveFloatDDS(s, fimage, baseComponent, componentCount);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*if (strCaseCmp(extension, ".pfm") == 0) {
|
/*if (strCaseDiff(extension, ".pfm") == 0) {
|
||||||
return saveFloatPFM(s, fimage, baseComponent, componentCount);
|
return saveFloatPFM(s, fimage, baseComponent, componentCount);
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
@ -1922,13 +2001,13 @@ bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, ui
|
|||||||
const char * extension = Path::extension(fileName);
|
const char * extension = Path::extension(fileName);
|
||||||
|
|
||||||
#if defined(HAVE_OPENEXR)
|
#if defined(HAVE_OPENEXR)
|
||||||
if (strCaseCmp(extension, ".exr") == 0) {
|
if (strCaseDiff(extension, ".exr") == 0) {
|
||||||
return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
|
return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(HAVE_TIFF)
|
#if defined(HAVE_TIFF)
|
||||||
if (strCaseCmp(extension, ".tif") == 0 || strCaseCmp(extension, ".tiff") == 0) {
|
if (strCaseDiff(extension, ".tif") == 0 || strCaseDiff(extension, ".tiff") == 0) {
|
||||||
return saveFloatTIFF(fileName, fimage, baseComponent, componentCount);
|
return saveFloatTIFF(fileName, fimage, baseComponent, componentCount);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -39,7 +39,7 @@ namespace nv
|
|||||||
// Build a cube centered on center and with edge = 2*dist
|
// Build a cube centered on center and with edge = 2*dist
|
||||||
inline void Box::cube(const Vector3 & center, float dist)
|
inline void Box::cube(const Vector3 & center, float dist)
|
||||||
{
|
{
|
||||||
setCenterExtents(center, Vector3(dist, dist, dist));
|
setCenterExtents(center, Vector3(dist));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build a box, given center and extents.
|
// Build a box, given center and extents.
|
||||||
|
@ -89,6 +89,7 @@ namespace nv
|
|||||||
uint8 b: 8;
|
uint8 b: 8;
|
||||||
#endif
|
#endif
|
||||||
};
|
};
|
||||||
|
uint8 component[4];
|
||||||
uint32 u;
|
uint32 u;
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
|
|
||||||
#include "Color.h"
|
#include "Color.h"
|
||||||
#include "Vector.inl"
|
#include "Vector.inl"
|
||||||
|
#include "ftoi.h"
|
||||||
|
|
||||||
|
|
||||||
namespace nv
|
namespace nv
|
||||||
@ -123,30 +124,30 @@ namespace nv
|
|||||||
inline Color32 toColor32(const Vector4 & v)
|
inline Color32 toColor32(const Vector4 & v)
|
||||||
{
|
{
|
||||||
Color32 color;
|
Color32 color;
|
||||||
color.r = toU8(nv::iround(saturate(v.x) * 255));
|
color.r = U8(ftoi_round(saturate(v.x) * 255));
|
||||||
color.g = toU8(nv::iround(saturate(v.y) * 255));
|
color.g = U8(ftoi_round(saturate(v.y) * 255));
|
||||||
color.b = toU8(nv::iround(saturate(v.z) * 255));
|
color.b = U8(ftoi_round(saturate(v.z) * 255));
|
||||||
color.a = toU8(nv::iround(saturate(v.w) * 255));
|
color.a = U8(ftoi_round(saturate(v.w) * 255));
|
||||||
return color;
|
return color;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Color32 toColor32_from_bgra(const Vector4 & v)
|
inline Color32 toColor32_from_bgra(const Vector4 & v)
|
||||||
{
|
{
|
||||||
Color32 color;
|
Color32 color;
|
||||||
color.b = toU8(nv::iround(saturate(v.x) * 255));
|
color.b = U8(ftoi_round(saturate(v.x) * 255));
|
||||||
color.g = toU8(nv::iround(saturate(v.y) * 255));
|
color.g = U8(ftoi_round(saturate(v.y) * 255));
|
||||||
color.r = toU8(nv::iround(saturate(v.z) * 255));
|
color.r = U8(ftoi_round(saturate(v.z) * 255));
|
||||||
color.a = toU8(nv::iround(saturate(v.w) * 255));
|
color.a = U8(ftoi_round(saturate(v.w) * 255));
|
||||||
return color;
|
return color;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline Color32 toColor32_from_argb(const Vector4 & v)
|
inline Color32 toColor32_from_argb(const Vector4 & v)
|
||||||
{
|
{
|
||||||
Color32 color;
|
Color32 color;
|
||||||
color.a = toU8(nv::iround(saturate(v.x) * 255));
|
color.a = U8(ftoi_round(saturate(v.x) * 255));
|
||||||
color.r = toU8(nv::iround(saturate(v.y) * 255));
|
color.r = U8(ftoi_round(saturate(v.y) * 255));
|
||||||
color.g = toU8(nv::iround(saturate(v.z) * 255));
|
color.g = U8(ftoi_round(saturate(v.z) * 255));
|
||||||
color.b = toU8(nv::iround(saturate(v.w) * 255));
|
color.b = U8(ftoi_round(saturate(v.w) * 255));
|
||||||
return color;
|
return color;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -4,10 +4,11 @@
|
|||||||
#include "Vector.inl"
|
#include "Vector.inl"
|
||||||
#include "Plane.inl"
|
#include "Plane.inl"
|
||||||
|
|
||||||
|
#include "nvcore/Array.inl"
|
||||||
#include "nvcore/Utils.h" // max, swap
|
#include "nvcore/Utils.h" // max, swap
|
||||||
|
|
||||||
#include <float.h> // FLT_MAX
|
#include <float.h> // FLT_MAX
|
||||||
#include <vector>
|
//#include <vector>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
|
|
||||||
using namespace nv;
|
using namespace nv;
|
||||||
@ -329,7 +330,7 @@ void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R);
|
|||||||
Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
|
Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict points)
|
||||||
{
|
{
|
||||||
// Store the points in an n x n matrix
|
// Store the points in an n x n matrix
|
||||||
std::vector<float> Q(n*n, 0.0f);
|
Array<float> Q; Q.resize(n*n, 0.0f);
|
||||||
for (int i = 0; i < n; ++i)
|
for (int i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
Q[i*n+0] = points[i].x;
|
Q[i*n+0] = points[i].x;
|
||||||
@ -338,8 +339,8 @@ Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Alloc space for the SVD outputs
|
// Alloc space for the SVD outputs
|
||||||
std::vector<float> diag(n, 0.0f);
|
Array<float> diag; diag.resize(n, 0.0f);
|
||||||
std::vector<float> R(n*n, 0.0f);
|
Array<float> R; R.resize(n*n, 0.0f);
|
||||||
|
|
||||||
ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
|
ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
|
||||||
|
|
||||||
@ -350,7 +351,7 @@ Vector3 nv::Fit::computePrincipalComponent_SVD(int n, const Vector3 *__restrict
|
|||||||
Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points)
|
Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict points)
|
||||||
{
|
{
|
||||||
// Store the points in an n x n matrix
|
// Store the points in an n x n matrix
|
||||||
std::vector<float> Q(n*n, 0.0f);
|
Array<float> Q; Q.resize(n*n, 0.0f);
|
||||||
for (int i = 0; i < n; ++i)
|
for (int i = 0; i < n; ++i)
|
||||||
{
|
{
|
||||||
Q[i*n+0] = points[i].x;
|
Q[i*n+0] = points[i].x;
|
||||||
@ -360,8 +361,8 @@ Vector4 nv::Fit::computePrincipalComponent_SVD(int n, const Vector4 *__restrict
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Alloc space for the SVD outputs
|
// Alloc space for the SVD outputs
|
||||||
std::vector<float> diag(n, 0.0f);
|
Array<float> diag; diag.resize(n, 0.0f);
|
||||||
std::vector<float> R(n*n, 0.0f);
|
Array<float> R; R.resize(n*n, 0.0f);
|
||||||
|
|
||||||
ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
|
ArvoSVD(n, n, &Q[0], &diag[0], &R[0]);
|
||||||
|
|
||||||
@ -940,7 +941,7 @@ void ArvoSVD(int rows, int cols, float * Q, float * diag, float * R)
|
|||||||
float g = 0.0f;
|
float g = 0.0f;
|
||||||
float scale = 0.0f;
|
float scale = 0.0f;
|
||||||
|
|
||||||
std::vector<float> temp(cols, 0.0f);
|
Array<float> temp; temp.resize(cols, 0.0f);
|
||||||
|
|
||||||
for( i = 0; i < cols; i++ )
|
for( i = 0; i < cols; i++ )
|
||||||
{
|
{
|
||||||
|
@ -580,56 +580,56 @@ namespace nv {
|
|||||||
void nv::half_init_tables()
|
void nv::half_init_tables()
|
||||||
{
|
{
|
||||||
// Init mantissa table.
|
// Init mantissa table.
|
||||||
mantissa_table[0] = 0;
|
mantissa_table[0] = 0;
|
||||||
|
|
||||||
// denormals
|
// denormals
|
||||||
for (int i = 1; i < 1024; i++) {
|
for (int i = 1; i < 1024; i++) {
|
||||||
uint m = i << 13;
|
uint m = i << 13;
|
||||||
uint e = 0;
|
uint e = 0;
|
||||||
|
|
||||||
while ((m & 0x00800000) == 0) {
|
while ((m & 0x00800000) == 0) {
|
||||||
e -= 0x00800000;
|
e -= 0x00800000;
|
||||||
m <<= 1;
|
m <<= 1;
|
||||||
}
|
}
|
||||||
m &= ~0x00800000;
|
m &= ~0x00800000;
|
||||||
e += 0x38800000;
|
e += 0x38800000;
|
||||||
mantissa_table[i] = m | e;
|
mantissa_table[i] = m | e;
|
||||||
}
|
}
|
||||||
|
|
||||||
// normals
|
// normals
|
||||||
for (int i = 1024; i < 2048; i++) {
|
for (int i = 1024; i < 2048; i++) {
|
||||||
mantissa_table[i] = (i - 1024) << 13;
|
mantissa_table[i] = (i - 1024) << 13;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// Init exponent table.
|
// Init exponent table.
|
||||||
exponent_table[0] = 0;
|
exponent_table[0] = 0;
|
||||||
|
|
||||||
for (int i = 1; i < 31; i++) {
|
for (int i = 1; i < 31; i++) {
|
||||||
exponent_table[i] = 0x38000000 + (i << 23);
|
exponent_table[i] = 0x38000000 + (i << 23);
|
||||||
}
|
}
|
||||||
|
|
||||||
exponent_table[31] = 0x7f800000;
|
exponent_table[31] = 0x7f800000;
|
||||||
exponent_table[32] = 0x80000000;
|
exponent_table[32] = 0x80000000;
|
||||||
|
|
||||||
for (int i = 33; i < 63; i++) {
|
for (int i = 33; i < 63; i++) {
|
||||||
exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
|
exponent_table[i] = 0xb8000000 + ((i - 32) << 23);
|
||||||
}
|
}
|
||||||
|
|
||||||
exponent_table[63] = 0xff800000;
|
exponent_table[63] = 0xff800000;
|
||||||
|
|
||||||
|
|
||||||
// Init offset table.
|
// Init offset table.
|
||||||
offset_table[0] = 0;
|
offset_table[0] = 0;
|
||||||
|
|
||||||
for (int i = 1; i < 32; i++) {
|
for (int i = 1; i < 32; i++) {
|
||||||
offset_table[i] = 1024;
|
offset_table[i] = 1024;
|
||||||
}
|
}
|
||||||
|
|
||||||
offset_table[32] = 0;
|
offset_table[32] = 0;
|
||||||
|
|
||||||
for (int i = 33; i < 64; i++) {
|
for (int i = 33; i < 64; i++) {
|
||||||
offset_table[i] = 1024;
|
offset_table[i] = 1024;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -660,27 +660,27 @@ uint32 nv::fast_half_to_float(uint16 v)
|
|||||||
// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though.
|
// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though.
|
||||||
|
|
||||||
|
|
||||||
static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
|
static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
|
||||||
static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
|
static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
|
||||||
static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
|
static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
|
||||||
static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000};
|
static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000};
|
||||||
|
|
||||||
__asm
|
__asm
|
||||||
{
|
{
|
||||||
movaps xmm1, xmm0 // Input in xmm0
|
movaps xmm1, xmm0 // Input in xmm0
|
||||||
movaps xmm2, xmm0
|
movaps xmm2, xmm0
|
||||||
|
|
||||||
andps xmm0, half_sign
|
andps xmm0, half_sign
|
||||||
andps xmm1, half_exponent
|
andps xmm1, half_exponent
|
||||||
andps xmm2, half_mantissa
|
andps xmm2, half_mantissa
|
||||||
paddd xmm1, half_bias_offset
|
paddd xmm1, half_bias_offset
|
||||||
|
|
||||||
pslld xmm0, 16
|
pslld xmm0, 16
|
||||||
pslld xmm1, 13
|
pslld xmm1, 13
|
||||||
pslld xmm2, 13
|
pslld xmm2, 13
|
||||||
|
|
||||||
orps xmm1, xmm2
|
orps xmm1, xmm2
|
||||||
orps xmm0, xmm1 // Result in xmm0
|
orps xmm0, xmm1 // Result in xmm0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -7,6 +7,10 @@
|
|||||||
|
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
|
|
||||||
|
#if !NV_CC_MSVC && !NV_OS_ORBIS
|
||||||
|
#include <alloca.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
using namespace nv;
|
using namespace nv;
|
||||||
|
|
||||||
|
|
||||||
@ -20,8 +24,7 @@ static bool ludcmp(float **a, int n, int *indx, float *d)
|
|||||||
{
|
{
|
||||||
const float TINY = 1.0e-20f;
|
const float TINY = 1.0e-20f;
|
||||||
|
|
||||||
Array<float> vv; // vv stores the implicit scaling of each row.
|
float * vv = (float*)alloca(sizeof(float) * n); // vv stores the implicit scaling of each row.
|
||||||
vv.resize(n);
|
|
||||||
|
|
||||||
*d = 1.0; // No row interchanges yet.
|
*d = 1.0; // No row interchanges yet.
|
||||||
for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information.
|
for (int i = 0; i < n; i++) { // Loop over rows to get the implicit scaling information.
|
||||||
@ -149,6 +152,21 @@ bool nv::solveLU(const Matrix & A, const Vector4 & b, Vector4 * x)
|
|||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// @@ Not tested.
|
||||||
|
Matrix nv::inverseLU(const Matrix & A)
|
||||||
|
{
|
||||||
|
Vector4 Ai[4];
|
||||||
|
|
||||||
|
solveLU(A, Vector4(1, 0, 0, 0), &Ai[0]);
|
||||||
|
solveLU(A, Vector4(0, 1, 0, 0), &Ai[1]);
|
||||||
|
solveLU(A, Vector4(0, 0, 1, 0), &Ai[2]);
|
||||||
|
solveLU(A, Vector4(0, 0, 0, 1), &Ai[3]);
|
||||||
|
|
||||||
|
return Matrix(Ai[0], Ai[1], Ai[2], Ai[3]);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
|
bool nv::solveLU(const Matrix3 & A, const Vector3 & b, Vector3 * x)
|
||||||
{
|
{
|
||||||
nvDebugCheck(x != NULL);
|
nvDebugCheck(x != NULL);
|
||||||
@ -184,7 +202,7 @@ bool nv::solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x)
|
|||||||
{
|
{
|
||||||
nvDebugCheck(x != NULL);
|
nvDebugCheck(x != NULL);
|
||||||
|
|
||||||
*x = transform(inverse(A), b);
|
*x = transform(inverseCramer(A), b);
|
||||||
|
|
||||||
return true; // @@ Return false if determinant(A) == 0 !
|
return true; // @@ Return false if determinant(A) == 0 !
|
||||||
}
|
}
|
||||||
@ -198,7 +216,7 @@ bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
Matrix3 Ai = inverse(A);
|
Matrix3 Ai = inverseCramer(A);
|
||||||
|
|
||||||
*x = transform(Ai, b);
|
*x = transform(Ai, b);
|
||||||
|
|
||||||
@ -207,6 +225,119 @@ bool nv::solveCramer(const Matrix3 & A, const Vector3 & b, Vector3 * x)
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Inverse using gaussian elimination. From Jon's code.
|
||||||
|
Matrix nv::inverse(const Matrix & m) {
|
||||||
|
|
||||||
|
Matrix A = m;
|
||||||
|
Matrix B(identity);
|
||||||
|
|
||||||
|
int i, j, k;
|
||||||
|
float max, t, det, pivot;
|
||||||
|
|
||||||
|
det = 1.0;
|
||||||
|
for (i=0; i<4; i++) { /* eliminate in column i, below diag */
|
||||||
|
max = -1.;
|
||||||
|
for (k=i; k<4; k++) /* find pivot for column i */
|
||||||
|
if (fabs(A(k, i)) > max) {
|
||||||
|
max = fabs(A(k, i));
|
||||||
|
j = k;
|
||||||
|
}
|
||||||
|
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
|
||||||
|
if (j!=i) { /* swap rows i and j */
|
||||||
|
for (k=i; k<4; k++)
|
||||||
|
swap(A(i, k), A(j, k));
|
||||||
|
for (k=0; k<4; k++)
|
||||||
|
swap(B(i, k), B(j, k));
|
||||||
|
det = -det;
|
||||||
|
}
|
||||||
|
pivot = A(i, i);
|
||||||
|
det *= pivot;
|
||||||
|
for (k=i+1; k<4; k++) /* only do elems to right of pivot */
|
||||||
|
A(i, k) /= pivot;
|
||||||
|
for (k=0; k<4; k++)
|
||||||
|
B(i, k) /= pivot;
|
||||||
|
/* we know that A(i, i) will be set to 1, so don't bother to do it */
|
||||||
|
|
||||||
|
for (j=i+1; j<4; j++) { /* eliminate in rows below i */
|
||||||
|
t = A(j, i); /* we're gonna zero this guy */
|
||||||
|
for (k=i+1; k<4; k++) /* subtract scaled row i from row j */
|
||||||
|
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
|
||||||
|
for (k=0; k<4; k++)
|
||||||
|
B(j, k) -= B(i, k)*t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------- backward elimination ----------*/
|
||||||
|
|
||||||
|
for (i=4-1; i>0; i--) { /* eliminate in column i, above diag */
|
||||||
|
for (j=0; j<i; j++) { /* eliminate in rows above i */
|
||||||
|
t = A(j, i); /* we're gonna zero this guy */
|
||||||
|
for (k=0; k<4; k++) /* subtract scaled row i from row j */
|
||||||
|
B(j, k) -= B(i, k)*t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return B;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
Matrix3 nv::inverse(const Matrix3 & m) {
|
||||||
|
|
||||||
|
Matrix3 A = m;
|
||||||
|
Matrix3 B(identity);
|
||||||
|
|
||||||
|
int i, j, k;
|
||||||
|
float max, t, det, pivot;
|
||||||
|
|
||||||
|
det = 1.0;
|
||||||
|
for (i=0; i<3; i++) { /* eliminate in column i, below diag */
|
||||||
|
max = -1.;
|
||||||
|
for (k=i; k<3; k++) /* find pivot for column i */
|
||||||
|
if (fabs(A(k, i)) > max) {
|
||||||
|
max = fabs(A(k, i));
|
||||||
|
j = k;
|
||||||
|
}
|
||||||
|
if (max<=0.) return B; /* if no nonzero pivot, PUNT */
|
||||||
|
if (j!=i) { /* swap rows i and j */
|
||||||
|
for (k=i; k<3; k++)
|
||||||
|
swap(A(i, k), A(j, k));
|
||||||
|
for (k=0; k<3; k++)
|
||||||
|
swap(B(i, k), B(j, k));
|
||||||
|
det = -det;
|
||||||
|
}
|
||||||
|
pivot = A(i, i);
|
||||||
|
det *= pivot;
|
||||||
|
for (k=i+1; k<3; k++) /* only do elems to right of pivot */
|
||||||
|
A(i, k) /= pivot;
|
||||||
|
for (k=0; k<3; k++)
|
||||||
|
B(i, k) /= pivot;
|
||||||
|
/* we know that A(i, i) will be set to 1, so don't bother to do it */
|
||||||
|
|
||||||
|
for (j=i+1; j<3; j++) { /* eliminate in rows below i */
|
||||||
|
t = A(j, i); /* we're gonna zero this guy */
|
||||||
|
for (k=i+1; k<3; k++) /* subtract scaled row i from row j */
|
||||||
|
A(j, k) -= A(i, k)*t; /* (ignore k<=i, we know they're 0) */
|
||||||
|
for (k=0; k<3; k++)
|
||||||
|
B(j, k) -= B(i, k)*t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/*---------- backward elimination ----------*/
|
||||||
|
|
||||||
|
for (i=3-1; i>0; i--) { /* eliminate in column i, above diag */
|
||||||
|
for (j=0; j<i; j++) { /* eliminate in rows above i */
|
||||||
|
t = A(j, i); /* we're gonna zero this guy */
|
||||||
|
for (k=0; k<3; k++) /* subtract scaled row i from row j */
|
||||||
|
B(j, k) -= B(i, k)*t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return B;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
|
|
||||||
|
@ -83,6 +83,9 @@ namespace nv
|
|||||||
void rotate(float theta, float v0, float v1, float v2);
|
void rotate(float theta, float v0, float v1, float v2);
|
||||||
float determinant() const;
|
float determinant() const;
|
||||||
|
|
||||||
|
void operator+=(const Matrix & m);
|
||||||
|
void operator-=(const Matrix & m);
|
||||||
|
|
||||||
void apply(Matrix::Arg m);
|
void apply(Matrix::Arg m);
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -90,11 +93,18 @@ namespace nv
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Solve equation system using LU decomposition and back-substitution.
|
// Solve equation system using LU decomposition and back-substitution.
|
||||||
extern bool solveLU(const Matrix & m, const Vector4 & b, Vector4 * x);
|
extern bool solveLU(const Matrix & A, const Vector4 & b, Vector4 * x);
|
||||||
|
|
||||||
// Solve equation system using Cramer's inverse.
|
// Solve equation system using Cramer's inverse.
|
||||||
extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x);
|
extern bool solveCramer(const Matrix & A, const Vector4 & b, Vector4 * x);
|
||||||
|
|
||||||
|
// Compute inverse using LU decomposition.
|
||||||
|
extern Matrix inverseLU(const Matrix & m);
|
||||||
|
|
||||||
|
// Compute inverse using Gaussian elimination and partial pivoting.
|
||||||
|
extern Matrix inverse(const Matrix & m);
|
||||||
|
extern Matrix3 inverse(const Matrix3 & m);
|
||||||
|
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
|
||||||
#endif // NV_MATH_MATRIX_H
|
#endif // NV_MATH_MATRIX_H
|
||||||
|
@ -195,7 +195,7 @@ namespace nv
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Inverse using Cramer's rule.
|
// Inverse using Cramer's rule.
|
||||||
inline Matrix3 inverse(const Matrix3 & m)
|
inline Matrix3 inverseCramer(const Matrix3 & m)
|
||||||
{
|
{
|
||||||
const float det = m.determinant();
|
const float det = m.determinant();
|
||||||
if (equal(det, 0.0f, 0.0f)) {
|
if (equal(det, 0.0f, 0.0f)) {
|
||||||
@ -477,6 +477,25 @@ namespace nv
|
|||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get inverse frustum matrix.
|
||||||
|
inline Matrix frustumInverse(float xmin, float xmax, float ymin, float ymax, float zNear, float zFar)
|
||||||
|
{
|
||||||
|
Matrix m(0.0f);
|
||||||
|
|
||||||
|
float one_doubleznear = 1.0f / (2.0f * zNear);
|
||||||
|
float one_doubleznearzfar = 1.0f / (2.0f * zNear * zFar);
|
||||||
|
|
||||||
|
m(0,0) = (xmax - xmin) * one_doubleznear;
|
||||||
|
m(0,3) = (xmax + xmin) * one_doubleznear;
|
||||||
|
m(1,1) = (ymax - ymin) * one_doubleznear;
|
||||||
|
m(1,3) = (ymax + ymin) * one_doubleznear;
|
||||||
|
m(2,3) = -1;
|
||||||
|
m(3,2) = -(zFar - zNear) * one_doubleznearzfar;
|
||||||
|
m(3,3) = (zFar + zNear) * one_doubleznearzfar;
|
||||||
|
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
// Get infinite frustum matrix.
|
// Get infinite frustum matrix.
|
||||||
inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear)
|
inline Matrix frustum(float xmin, float xmax, float ymin, float ymax, float zNear)
|
||||||
{
|
{
|
||||||
@ -510,6 +529,18 @@ namespace nv
|
|||||||
return frustum(xmin, xmax, ymin, ymax, zNear, zFar);
|
return frustum(xmin, xmax, ymin, ymax, zNear, zFar);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Get inverse perspective matrix.
|
||||||
|
inline Matrix perspectiveInverse(float fovy, float aspect, float zNear, float zFar)
|
||||||
|
{
|
||||||
|
float xmax = zNear * tan(fovy / 2);
|
||||||
|
float xmin = -xmax;
|
||||||
|
|
||||||
|
float ymax = xmax / aspect;
|
||||||
|
float ymin = -ymax;
|
||||||
|
|
||||||
|
return frustumInverse(xmin, xmax, ymin, ymax, zNear, zFar);
|
||||||
|
}
|
||||||
|
|
||||||
// Get infinite perspective matrix.
|
// Get infinite perspective matrix.
|
||||||
inline Matrix perspective(float fovy, float aspect, float zNear)
|
inline Matrix perspective(float fovy, float aspect, float zNear)
|
||||||
{
|
{
|
||||||
@ -544,7 +575,7 @@ namespace nv
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Inverse using Cramer's rule.
|
// Inverse using Cramer's rule.
|
||||||
inline Matrix inverse(Matrix::Arg m)
|
inline Matrix inverseCramer(Matrix::Arg m)
|
||||||
{
|
{
|
||||||
Matrix r;
|
Matrix r;
|
||||||
r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15);
|
r.data( 0) = m.data(6)*m.data(11)*m.data(13) - m.data(7)*m.data(10)*m.data(13) + m.data(7)*m.data(9)*m.data(14) - m.data(5)*m.data(11)*m.data(14) - m.data(6)*m.data(9)*m.data(15) + m.data(5)*m.data(10)*m.data(15);
|
||||||
@ -622,6 +653,35 @@ namespace nv
|
|||||||
return m;
|
return m;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline void Matrix::operator+=(const Matrix & m)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < 16; i++) {
|
||||||
|
m_data[i] += m.m_data[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void Matrix::operator-=(const Matrix & m)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < 16; i++) {
|
||||||
|
m_data[i] -= m.m_data[i];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Matrix operator+(const Matrix & a, const Matrix & b)
|
||||||
|
{
|
||||||
|
Matrix m = a;
|
||||||
|
m += b;
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Matrix operator-(const Matrix & a, const Matrix & b)
|
||||||
|
{
|
||||||
|
Matrix m = a;
|
||||||
|
m -= b;
|
||||||
|
return m;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,13 +12,13 @@
|
|||||||
# if NV_CPU_X86 || NV_CPU_X86_64
|
# if NV_CPU_X86 || NV_CPU_X86_64
|
||||||
# define NV_USE_SSE 2
|
# define NV_USE_SSE 2
|
||||||
# endif
|
# endif
|
||||||
//# if defined(__SSE2__)
|
# if defined(__SSE2__)
|
||||||
//# define NV_USE_SSE 2
|
# define NV_USE_SSE 2
|
||||||
//# elif defined(__SSE__)
|
# elif defined(__SSE__)
|
||||||
//# define NV_USE_SSE 1
|
# define NV_USE_SSE 1
|
||||||
//# else
|
# else
|
||||||
//# define NV_USE_SSE 0
|
# define NV_USE_SSE 0
|
||||||
//# endif
|
# endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Internally set NV_USE_SIMD when either altivec or sse is available.
|
// Internally set NV_USE_SIMD when either altivec or sse is available.
|
||||||
|
@ -144,6 +144,6 @@ namespace nv
|
|||||||
// Instead we simply have explicit casts:
|
// Instead we simply have explicit casts:
|
||||||
template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); }
|
template <typename T> T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); }
|
||||||
template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); }
|
template <typename T> T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); }
|
||||||
template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.z); }
|
template <typename T> T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.w); }
|
||||||
|
|
||||||
#endif // NV_MATH_VECTOR_H
|
#endif // NV_MATH_VECTOR_H
|
||||||
|
@ -440,14 +440,17 @@ namespace nv
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Note, this is the area scaled by 2!
|
// Note, this is the area scaled by 2!
|
||||||
|
inline float triangleArea(Vector2::Arg v0, Vector2::Arg v1)
|
||||||
|
{
|
||||||
|
return (v0.x * v1.y - v0.y * v1.x); // * 0.5f;
|
||||||
|
}
|
||||||
inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
|
inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
|
||||||
{
|
{
|
||||||
Vector2 v0 = a - c;
|
return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f;
|
||||||
Vector2 v1 = b - c;
|
//return triangleArea(a-c, b-c);
|
||||||
|
|
||||||
return (v0.x * v1.y - v0.y * v1.x);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
inline uint hash(const Vector2 & v, uint h)
|
inline uint hash(const Vector2 & v, uint h)
|
||||||
{
|
{
|
||||||
|
256
src/nvmath/ftoi.h
Executable file
256
src/nvmath/ftoi.h
Executable file
@ -0,0 +1,256 @@
|
|||||||
|
// This code is in the public domain -- castano@gmail.com
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
#ifndef NV_MATH_FTOI_H
|
||||||
|
#define NV_MATH_FTOI_H
|
||||||
|
|
||||||
|
#include "nvmath/nvmath.h"
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
|
||||||
|
namespace nv
|
||||||
|
{
|
||||||
|
// Optimized float to int conversions. See:
|
||||||
|
// http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
|
||||||
|
// http://www.stereopsis.com/sree/fpu2006.html
|
||||||
|
// http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
|
||||||
|
// http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
|
||||||
|
|
||||||
|
|
||||||
|
union DoubleAnd64 {
|
||||||
|
uint64 i;
|
||||||
|
double d;
|
||||||
|
};
|
||||||
|
|
||||||
|
static const double floatutil_xs_doublemagic = (6755399441055744.0); // 2^52 * 1.5
|
||||||
|
static const double floatutil_xs_doublemagicdelta = (1.5e-8); // almost .5f = .5f + 1e^(number of exp bit)
|
||||||
|
static const double floatutil_xs_doublemagicroundeps = (0.5f - floatutil_xs_doublemagicdelta); // almost .5f = .5f - 1e^(number of exp bit)
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_round_xs(double val, double magic) {
|
||||||
|
#if 1
|
||||||
|
DoubleAnd64 dunion;
|
||||||
|
dunion.d = val + magic;
|
||||||
|
return (int32) dunion.i; // just cast to grab the bottom bits
|
||||||
|
#else
|
||||||
|
val += magic;
|
||||||
|
return ((int*)&val)[0]; // @@ Assumes little endian.
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_round_xs(float val) {
|
||||||
|
return ftoi_round_xs(val, floatutil_xs_doublemagic);
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_floor_xs(float val) {
|
||||||
|
return ftoi_round_xs(val - floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_ceil_xs(float val) {
|
||||||
|
return ftoi_round_xs(val + floatutil_xs_doublemagicroundeps, floatutil_xs_doublemagic);
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_trunc_xs(float val) {
|
||||||
|
return (val<0) ? ftoi_ceil_xs(val) : ftoi_floor_xs(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
#if NV_CPU_X86 || NV_CPU_X86_64
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_round_sse(float f) {
|
||||||
|
return _mm_cvt_ss2si(_mm_set_ss(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_trunc_sse(float f) {
|
||||||
|
return _mm_cvtt_ss2si(_mm_set_ss(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if NV_USE_SSE
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_round(float val) {
|
||||||
|
return ftoi_round_sse(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_trunc(float f) {
|
||||||
|
return ftoi_trunc_sse(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
// We can probably do better than this. See for example:
|
||||||
|
// http://dss.stephanierct.com/DevBlog/?p=8
|
||||||
|
NV_FORCEINLINE int ftoi_floor(float val) {
|
||||||
|
return ftoi_round(floorf(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_ceil(float val) {
|
||||||
|
return ftoi_round(ceilf(val));
|
||||||
|
}
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
// In theory this should work with any double floating point math implementation, but it appears that MSVC produces incorrect code
|
||||||
|
// when SSE2 is targeted and fast math is enabled (/arch:SSE2 & /fp:fast). These problems go away with /fp:precise, which is the default mode.
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_round(float val) {
|
||||||
|
return ftoi_round_xs(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_floor(float val) {
|
||||||
|
return ftoi_floor_xs(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_ceil(float val) {
|
||||||
|
return ftoi_ceil_xs(val);
|
||||||
|
}
|
||||||
|
|
||||||
|
NV_FORCEINLINE int ftoi_trunc(float f) {
|
||||||
|
return ftoi_trunc_xs(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
inline void test_ftoi() {
|
||||||
|
|
||||||
|
// Round to nearest integer.
|
||||||
|
nvCheck(ftoi_round(0.1f) == 0);
|
||||||
|
nvCheck(ftoi_round(0.6f) == 1);
|
||||||
|
nvCheck(ftoi_round(-0.2f) == 0);
|
||||||
|
nvCheck(ftoi_round(-0.7f) == -1);
|
||||||
|
nvCheck(ftoi_round(10.1f) == 10);
|
||||||
|
nvCheck(ftoi_round(10.6f) == 11);
|
||||||
|
nvCheck(ftoi_round(-90.1f) == -90);
|
||||||
|
nvCheck(ftoi_round(-90.6f) == -91);
|
||||||
|
|
||||||
|
nvCheck(ftoi_round(0) == 0);
|
||||||
|
nvCheck(ftoi_round(1) == 1);
|
||||||
|
nvCheck(ftoi_round(-1) == -1);
|
||||||
|
|
||||||
|
nvCheck(ftoi_round(0.5f) == 0); // How are midpoints rounded? Bankers rounding.
|
||||||
|
nvCheck(ftoi_round(1.5f) == 2);
|
||||||
|
nvCheck(ftoi_round(2.5f) == 2);
|
||||||
|
nvCheck(ftoi_round(3.5f) == 4);
|
||||||
|
nvCheck(ftoi_round(4.5f) == 4);
|
||||||
|
nvCheck(ftoi_round(-0.5f) == 0);
|
||||||
|
nvCheck(ftoi_round(-1.5f) == -2);
|
||||||
|
|
||||||
|
|
||||||
|
// Truncation (round down if > 0, round up if < 0).
|
||||||
|
nvCheck(ftoi_trunc(0.1f) == 0);
|
||||||
|
nvCheck(ftoi_trunc(0.6f) == 0);
|
||||||
|
nvCheck(ftoi_trunc(-0.2f) == 0);
|
||||||
|
nvCheck(ftoi_trunc(-0.7f) == 0); // @@ When using /arch:SSE2 in Win32, msvc produce wrong code for this one. It is skipping the addition.
|
||||||
|
nvCheck(ftoi_trunc(1.99f) == 1);
|
||||||
|
nvCheck(ftoi_trunc(-1.2f) == -1);
|
||||||
|
|
||||||
|
// Floor (round down).
|
||||||
|
nvCheck(ftoi_floor(0.1f) == 0);
|
||||||
|
nvCheck(ftoi_floor(0.6f) == 0);
|
||||||
|
nvCheck(ftoi_floor(-0.2f) == -1);
|
||||||
|
nvCheck(ftoi_floor(-0.7f) == -1);
|
||||||
|
nvCheck(ftoi_floor(1.99f) == 1);
|
||||||
|
nvCheck(ftoi_floor(-1.2f) == -2);
|
||||||
|
|
||||||
|
nvCheck(ftoi_floor(0) == 0);
|
||||||
|
nvCheck(ftoi_floor(1) == 1);
|
||||||
|
nvCheck(ftoi_floor(-1) == -1);
|
||||||
|
nvCheck(ftoi_floor(2) == 2);
|
||||||
|
nvCheck(ftoi_floor(-2) == -2);
|
||||||
|
|
||||||
|
// Ceil (round up).
|
||||||
|
nvCheck(ftoi_ceil(0.1f) == 1);
|
||||||
|
nvCheck(ftoi_ceil(0.6f) == 1);
|
||||||
|
nvCheck(ftoi_ceil(-0.2f) == 0);
|
||||||
|
nvCheck(ftoi_ceil(-0.7f) == 0);
|
||||||
|
nvCheck(ftoi_ceil(1.99f) == 2);
|
||||||
|
nvCheck(ftoi_ceil(-1.2f) == -1);
|
||||||
|
|
||||||
|
nvCheck(ftoi_ceil(0) == 0);
|
||||||
|
nvCheck(ftoi_ceil(1) == 1);
|
||||||
|
nvCheck(ftoi_ceil(-1) == -1);
|
||||||
|
nvCheck(ftoi_ceil(2) == 2);
|
||||||
|
nvCheck(ftoi_ceil(-2) == -2);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Safe versions using standard casts.
|
||||||
|
|
||||||
|
inline int iround(float f)
|
||||||
|
{
|
||||||
|
return int(floorf(f + 0.5f));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int iround(double f)
|
||||||
|
{
|
||||||
|
return int(::floor(f + 0.5));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int ifloor(float f)
|
||||||
|
{
|
||||||
|
return int(floorf(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline int iceil(float f)
|
||||||
|
{
|
||||||
|
return int(ceilf(f));
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
|
||||||
|
// Some notes from cbloom: http://cbloomrants.blogspot.com/2011/07/07-26-11-pixel-int-to-float-options.html
|
||||||
|
|
||||||
|
// Quantize a float in the [0,1] range, using exact end points or uniform bins.
|
||||||
|
inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
|
||||||
|
nvDebugCheck(bits <= 16);
|
||||||
|
|
||||||
|
float range = float(1 << bits);
|
||||||
|
if (exactEndPoints) {
|
||||||
|
return floorf(x * (range-1) + 0.5f) / (range-1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return (floorf(x * range) + 0.5f) / range;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// This is the most common rounding mode:
|
||||||
|
//
|
||||||
|
// 0 1 2 3
|
||||||
|
// |___|_______|_______|___|
|
||||||
|
// 0 1
|
||||||
|
//
|
||||||
|
// You get that if you take the unit floating point number multiply by 'N-1' and round to nearest. That is, `i = round(f * (N-1))`.
|
||||||
|
// You reconstruct the original float dividing by 'N-1': `f = i / (N-1)`
|
||||||
|
|
||||||
|
|
||||||
|
// 0 1 2 3
|
||||||
|
// |_____|_____|_____|_____|
|
||||||
|
// 0 1
|
||||||
|
|
||||||
|
/*enum BinningMode {
|
||||||
|
RoundMode_ExactEndPoints,
|
||||||
|
RoundMode_UniformBins,
|
||||||
|
};*/
|
||||||
|
|
||||||
|
template <int N>
|
||||||
|
inline uint unitFloatToFixed(float f) {
|
||||||
|
return ftoi_round(f * ((1<<N)-1));
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint8 unitFloatToFixed8(float f) {
|
||||||
|
return (uint8)unitFloatToFixed<8>(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline uint16 unitFloatToFixed16(float f) {
|
||||||
|
return (uint16)unitFloatToFixed<16>(f);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
} // nv
|
||||||
|
|
||||||
|
#endif // NV_MATH_FTOI_H
|
@ -14,6 +14,13 @@
|
|||||||
#include <float.h> // finite, isnan
|
#include <float.h> // finite, isnan
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if NV_CPU_X86 || NV_CPU_X86_64
|
||||||
|
//#include <intrin.h>
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// Function linkage
|
// Function linkage
|
||||||
#if NVMATH_SHARED
|
#if NVMATH_SHARED
|
||||||
#ifdef NVMATH_EXPORTS
|
#ifdef NVMATH_EXPORTS
|
||||||
@ -28,6 +35,37 @@
|
|||||||
#define NVMATH_CLASS
|
#define NVMATH_CLASS
|
||||||
#endif // NVMATH_SHARED
|
#endif // NVMATH_SHARED
|
||||||
|
|
||||||
|
// Set some reasonable defaults.
|
||||||
|
#ifndef NV_USE_ALTIVEC
|
||||||
|
# define NV_USE_ALTIVEC NV_CPU_PPC
|
||||||
|
//# define NV_USE_ALTIVEC defined(__VEC__)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef NV_USE_SSE
|
||||||
|
# if NV_CPU_X86_64
|
||||||
|
// x64 always supports at least SSE2
|
||||||
|
# define NV_USE_SSE 2
|
||||||
|
# elif NV_CC_MSVC && defined(_M_IX86_FP)
|
||||||
|
// Also on x86 with the /arch:SSE flag in MSVC.
|
||||||
|
# define NV_USE_SSE _M_IX86_FP // 1=SSE, 2=SS2
|
||||||
|
# elif defined(__SSE__)
|
||||||
|
# define NV_USE_SSE 1
|
||||||
|
# elif defined(__SSE2__)
|
||||||
|
# define NV_USE_SSE 2
|
||||||
|
# else
|
||||||
|
// Otherwise we assume no SSE.
|
||||||
|
# define NV_USE_SSE 0
|
||||||
|
# endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// Internally set NV_USE_SIMD when either altivec or sse is available.
|
||||||
|
#if NV_USE_ALTIVEC && NV_USE_SSE
|
||||||
|
# error "Cannot enable both altivec and sse!"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#ifndef PI
|
#ifndef PI
|
||||||
#define PI float(3.1415926535897932384626433833)
|
#define PI float(3.1415926535897932384626433833)
|
||||||
#endif
|
#endif
|
||||||
@ -179,26 +217,6 @@ namespace nv
|
|||||||
inline float cube(float f) { return f * f * f; }
|
inline float cube(float f) { return f * f * f; }
|
||||||
inline int cube(int i) { return i * i * i; }
|
inline int cube(int i) { return i * i * i; }
|
||||||
|
|
||||||
// @@ Float to int conversions to be optimized at some point. See:
|
|
||||||
// http://cbloomrants.blogspot.com/2009/01/01-17-09-float-to-int.html
|
|
||||||
// http://www.stereopsis.com/sree/fpu2006.html
|
|
||||||
// http://assemblyrequired.crashworks.org/2009/01/12/why-you-should-never-cast-floats-to-ints/
|
|
||||||
// http://chrishecker.com/Miscellaneous_Technical_Articles#Floating_Point
|
|
||||||
inline int iround(float f)
|
|
||||||
{
|
|
||||||
return int(floorf(f + 0.5f));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int ifloor(float f)
|
|
||||||
{
|
|
||||||
return int(floorf(f));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline int iceil(float f)
|
|
||||||
{
|
|
||||||
return int(ceilf(f));
|
|
||||||
}
|
|
||||||
|
|
||||||
inline float frac(float f)
|
inline float frac(float f)
|
||||||
{
|
{
|
||||||
return f - floor(f);
|
return f - floor(f);
|
||||||
@ -242,21 +260,6 @@ namespace nv
|
|||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule.
|
|
||||||
|
|
||||||
// Quantize a float in the [0,1] range, using exact end points or uniform bins.
|
|
||||||
inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) {
|
|
||||||
nvDebugCheck(bits <= 16);
|
|
||||||
|
|
||||||
float range = float(1 << bits);
|
|
||||||
if (exactEndPoints) {
|
|
||||||
return floorf(x * (range-1) + 0.5f) / (range-1);
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
return (floorf(x * range) + 0.5f) / range;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
union Float754 {
|
union Float754 {
|
||||||
unsigned int raw;
|
unsigned int raw;
|
||||||
float value;
|
float value;
|
||||||
|
@ -50,7 +50,7 @@ AutoPtr<ThreadPool> s_pool;
|
|||||||
|
|
||||||
|
|
||||||
/*static*/ void ThreadPool::workerFunc(void * arg) {
|
/*static*/ void ThreadPool::workerFunc(void * arg) {
|
||||||
uint i = toU32((uintptr_t)arg); // This is OK, because workerCount should always be much smaller than 2^32
|
uint i = U32((uintptr_t)arg); // This is OK, because workerCount should always be much smaller than 2^32
|
||||||
|
|
||||||
while(true)
|
while(true)
|
||||||
{
|
{
|
||||||
|
@ -165,6 +165,10 @@ void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u
|
|||||||
// Use a single thread to compress small textures.
|
// Use a single thread to compress small textures.
|
||||||
if (context.bh < 4) dispatcher = &sequential;
|
if (context.bh < 4) dispatcher = &sequential;
|
||||||
|
|
||||||
|
#if _DEBUG
|
||||||
|
dispatcher = &sequential;
|
||||||
|
#endif
|
||||||
|
|
||||||
const uint count = context.bw * context.bh;
|
const uint count = context.bw * context.bh;
|
||||||
const uint size = context.bs * count;
|
const uint size = context.bs * count;
|
||||||
context.mem = new uint8[size];
|
context.mem = new uint8[size];
|
||||||
@ -231,6 +235,10 @@ void ColorSetCompressor::compress(AlphaMode alphaMode, uint w, uint h, uint d, c
|
|||||||
// Use a single thread to compress small textures.
|
// Use a single thread to compress small textures.
|
||||||
if (context.bh < 4) dispatcher = &sequential;
|
if (context.bh < 4) dispatcher = &sequential;
|
||||||
|
|
||||||
|
#if _DEBUG
|
||||||
|
dispatcher = &sequential;
|
||||||
|
#endif
|
||||||
|
|
||||||
const uint count = context.bw * context.bh;
|
const uint count = context.bw * context.bh;
|
||||||
const uint size = context.bs * count;
|
const uint size = context.bs * count;
|
||||||
context.mem = new uint8[size];
|
context.mem = new uint8[size];
|
||||||
|
@ -27,6 +27,7 @@
|
|||||||
#include "ClusterFit.h"
|
#include "ClusterFit.h"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
#include "nvimage/ColorBlock.h"
|
#include "nvimage/ColorBlock.h"
|
||||||
|
|
||||||
#include <float.h> // FLT_MAX
|
#include <float.h> // FLT_MAX
|
||||||
@ -37,7 +38,8 @@ ClusterFit::ClusterFit()
|
|||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
void ClusterFit::setColourSet(const ColorSet * set)
|
// @@ Deprecate. Do not use color set directly.
|
||||||
|
void ClusterFit::setColorSet(const ColorSet * set)
|
||||||
{
|
{
|
||||||
// initialise the best error
|
// initialise the best error
|
||||||
#if NVTT_USE_SIMD
|
#if NVTT_USE_SIMD
|
||||||
@ -58,6 +60,7 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
|||||||
}
|
}
|
||||||
|
|
||||||
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric);
|
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(m_count, values, set->weights, metric);
|
||||||
|
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(m_count, values, set->weights, metric);
|
||||||
|
|
||||||
// build the list of values
|
// build the list of values
|
||||||
int order[16];
|
int order[16];
|
||||||
@ -107,7 +110,72 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ClusterFit::setMetric(Vector4::Arg w)
|
void ClusterFit::setColorSet(const Vector3 * colors, const float * weights, int count)
|
||||||
|
{
|
||||||
|
// initialise the best error
|
||||||
|
#if NVTT_USE_SIMD
|
||||||
|
m_besterror = SimdVector( FLT_MAX );
|
||||||
|
Vector3 metric = m_metric.toVector3();
|
||||||
|
#else
|
||||||
|
m_besterror = FLT_MAX;
|
||||||
|
Vector3 metric = m_metric;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
m_count = count;
|
||||||
|
|
||||||
|
Vector3 principal = Fit::computePrincipalComponent_PowerMethod(count, colors, weights, metric);
|
||||||
|
//Vector3 principal = Fit::computePrincipalComponent_EigenSolver(count, colors, weights, metric);
|
||||||
|
|
||||||
|
// build the list of values
|
||||||
|
int order[16];
|
||||||
|
float dps[16];
|
||||||
|
for (uint i = 0; i < m_count; ++i)
|
||||||
|
{
|
||||||
|
dps[i] = dot(colors[i], principal);
|
||||||
|
order[i] = i;
|
||||||
|
}
|
||||||
|
|
||||||
|
// stable sort
|
||||||
|
for (uint i = 0; i < m_count; ++i)
|
||||||
|
{
|
||||||
|
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
|
||||||
|
{
|
||||||
|
swap(dps[j], dps[j - 1]);
|
||||||
|
swap(order[j], order[j - 1]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// weight all the points
|
||||||
|
#if NVTT_USE_SIMD
|
||||||
|
m_xxsum = SimdVector( 0.0f );
|
||||||
|
m_xsum = SimdVector( 0.0f );
|
||||||
|
#else
|
||||||
|
m_xxsum = Vector3(0.0f);
|
||||||
|
m_xsum = Vector3(0.0f);
|
||||||
|
m_wsum = 0.0f;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
for (uint i = 0; i < m_count; ++i)
|
||||||
|
{
|
||||||
|
int p = order[i];
|
||||||
|
#if NVTT_USE_SIMD
|
||||||
|
NV_ALIGN_16 Vector4 tmp(colors[p], 1);
|
||||||
|
m_weighted[i] = SimdVector(tmp.component) * SimdVector(weights[p]);
|
||||||
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
||||||
|
m_xsum += m_weighted[i];
|
||||||
|
#else
|
||||||
|
m_weighted[i] = colors[p] * weights[p];
|
||||||
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
||||||
|
m_xsum += m_weighted[i];
|
||||||
|
m_weights[i] = weights[p];
|
||||||
|
m_wsum += m_weights[i];
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void ClusterFit::setColorWeights(Vector4::Arg w)
|
||||||
{
|
{
|
||||||
#if NVTT_USE_SIMD
|
#if NVTT_USE_SIMD
|
||||||
NV_ALIGN_16 Vector4 tmp(w.xyz(), 1);
|
NV_ALIGN_16 Vector4 tmp(w.xyz(), 1);
|
||||||
@ -292,12 +360,21 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
|
|||||||
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
|
SimdVector e3 = negativeMultiplySubtract( b, betax_sum, e2 );
|
||||||
SimdVector e4 = multiplyAdd( two, e3, e1 );
|
SimdVector e4 = multiplyAdd( two, e3, e1 );
|
||||||
|
|
||||||
|
#if 1
|
||||||
// apply the metric to the error term
|
// apply the metric to the error term
|
||||||
SimdVector e5 = e4 * m_metricSqr;
|
SimdVector e5 = e4 * m_metricSqr;
|
||||||
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
|
SimdVector error = e5.splatX() + e5.splatY() + e5.splatZ();
|
||||||
|
#else
|
||||||
|
// @@ Is there a horizontal max SIMD instruction?
|
||||||
|
SimdVector error = e4.splatX() + e4.splatY() + e4.splatZ();
|
||||||
|
error *= two;
|
||||||
|
error += max(max(e4.splatX(), e4.splatY()), e4.splatZ());
|
||||||
|
error -= min(min(e4.splatX(), e4.splatY()), e4.splatZ());
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
// keep the solution if it wins
|
// keep the solution if it wins
|
||||||
if( compareAnyLessThan( error, besterror ) )
|
if (compareAnyLessThan(error, besterror))
|
||||||
{
|
{
|
||||||
besterror = error;
|
besterror = error;
|
||||||
beststart = a;
|
beststart = a;
|
||||||
@ -317,7 +394,7 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
|
|||||||
}
|
}
|
||||||
|
|
||||||
// save the block if necessary
|
// save the block if necessary
|
||||||
if( compareAnyLessThan( besterror, m_besterror ) )
|
if (compareAnyLessThan(besterror, m_besterror))
|
||||||
{
|
{
|
||||||
*start = beststart.toVector3();
|
*start = beststart.toVector3();
|
||||||
*end = bestend.toVector3();
|
*end = bestend.toVector3();
|
||||||
@ -333,6 +410,29 @@ bool ClusterFit::compress4( Vector3 * start, Vector3 * end )
|
|||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
inline Vector3 round565(const Vector3 & v) {
|
||||||
|
uint r = ftoi_floor(v.x * 31.0f);
|
||||||
|
float r0 = float(((r+0) << 3) | ((r+0) >> 2));
|
||||||
|
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
|
||||||
|
if (fabs(v.x - r1) < fabs(v.x - r0)) r = min(r+1, 31U);
|
||||||
|
r = (r << 3) | (r >> 2);
|
||||||
|
|
||||||
|
uint g = ftoi_floor(v.y * 63.0f);
|
||||||
|
float g0 = float(((g+0) << 2) | ((g+0) >> 4));
|
||||||
|
float g1 = float(((g+1) << 2) | ((g+1) >> 4));
|
||||||
|
if (fabs(v.y - g1) < fabs(v.y - g0)) g = min(g+1, 63U);
|
||||||
|
g = (g << 2) | (g >> 4);
|
||||||
|
|
||||||
|
uint b = ftoi_floor(v.z * 31.0f);
|
||||||
|
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
|
||||||
|
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
|
||||||
|
if (fabs(v.z - b1) < fabs(v.z - b0)) b = min(b+1, 31U);
|
||||||
|
|
||||||
|
b = (b << 3) | (b >> 2);
|
||||||
|
|
||||||
|
return Vector3(float(r)/255, float(g)/255, float(b)/255);
|
||||||
|
}
|
||||||
|
|
||||||
bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
|
bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
|
||||||
{
|
{
|
||||||
const uint count = m_count;
|
const uint count = m_count;
|
||||||
@ -374,8 +474,29 @@ bool ClusterFit::compress3(Vector3 * start, Vector3 * end)
|
|||||||
// clamp to the grid
|
// clamp to the grid
|
||||||
a = clamp(a, 0, 1);
|
a = clamp(a, 0, 1);
|
||||||
b = clamp(b, 0, 1);
|
b = clamp(b, 0, 1);
|
||||||
a = floor(grid * a + 0.5f) * gridrcp;
|
//a = floor(grid * a + 0.5f) * gridrcp;
|
||||||
b = floor(grid * b + 0.5f) * gridrcp;
|
//b = floor(grid * b + 0.5f) * gridrcp;
|
||||||
|
|
||||||
|
//int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f;
|
||||||
|
//int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f;
|
||||||
|
//int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f;
|
||||||
|
//int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f;
|
||||||
|
//int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f;
|
||||||
|
//int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f;
|
||||||
|
|
||||||
|
/*a = floor(a * grid + 0.5f);
|
||||||
|
a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f;
|
||||||
|
a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f;
|
||||||
|
a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f;
|
||||||
|
|
||||||
|
b = floor(b * grid + 0.5f);
|
||||||
|
b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f;
|
||||||
|
b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f;
|
||||||
|
b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;*/
|
||||||
|
|
||||||
|
a = round565(a);
|
||||||
|
b = round565(b);
|
||||||
|
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
||||||
@ -461,8 +582,30 @@ bool ClusterFit::compress4(Vector3 * start, Vector3 * end)
|
|||||||
// clamp to the grid
|
// clamp to the grid
|
||||||
a = clamp(a, 0, 1);
|
a = clamp(a, 0, 1);
|
||||||
b = clamp(b, 0, 1);
|
b = clamp(b, 0, 1);
|
||||||
a = floor(a * grid + 0.5f) * gridrcp;
|
//a = floor(a * grid + 0.5f) * gridrcp;
|
||||||
b = floor(b * grid + 0.5f) * gridrcp;
|
//b = floor(b * grid + 0.5f) * gridrcp;
|
||||||
|
|
||||||
|
//int ar = ftoi_round(31 * a.x); ar = (ar << 3) | (ar >> 2); a.x = float(ar) / 255.0f;
|
||||||
|
//int ag = ftoi_round(63 * a.y); ar = (ag << 2) | (ag >> 4); a.y = float(ag) / 255.0f;
|
||||||
|
//int ab = ftoi_round(31 * a.z); ar = (ab << 3) | (ab >> 2); a.z = float(ab) / 255.0f;
|
||||||
|
//int br = ftoi_round(31 * b.x); br = (br << 3) | (br >> 2); b.x = float(br) / 255.0f;
|
||||||
|
//int bg = ftoi_round(63 * b.y); br = (bg << 2) | (bg >> 4); b.y = float(bg) / 255.0f;
|
||||||
|
//int bb = ftoi_round(31 * b.z); br = (bb << 3) | (bb >> 2); b.z = float(bb) / 255.0f;
|
||||||
|
|
||||||
|
/*
|
||||||
|
a = floor(a * grid + 0.5f);
|
||||||
|
a.x = (a.x * 8 + floorf(a.x / 4)) / 255.0f;
|
||||||
|
a.y = (a.y * 4 + floorf(a.y / 16)) / 255.0f;
|
||||||
|
a.z = (a.z * 8 + floorf(a.z / 4)) / 255.0f;
|
||||||
|
|
||||||
|
b = floor(b * grid + 0.5f);
|
||||||
|
b.x = (b.x * 8 + floorf(b.x / 4)) / 255.0f;
|
||||||
|
b.y = (b.y * 4 + floorf(b.y / 16)) / 255.0f;
|
||||||
|
b.z = (b.z * 8 + floorf(b.z / 4)) / 255.0f;
|
||||||
|
*/
|
||||||
|
|
||||||
|
a = round565(a);
|
||||||
|
b = round565(b);
|
||||||
|
|
||||||
// compute the error
|
// compute the error
|
||||||
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
Vector3 e1 = a*a*alpha2_sum + b*b*beta2_sum + 2.0f*( a*b*alphabeta_sum - a*alphax_sum - b*betax_sum );
|
||||||
|
@ -31,8 +31,8 @@
|
|||||||
#include "nvmath/Vector.h"
|
#include "nvmath/Vector.h"
|
||||||
|
|
||||||
// Use SIMD version if altivec or SSE are available.
|
// Use SIMD version if altivec or SSE are available.
|
||||||
#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE)
|
//#define NVTT_USE_SIMD (NV_USE_ALTIVEC || NV_USE_SSE)
|
||||||
//#define NVTT_USE_SIMD 0
|
#define NVTT_USE_SIMD 0
|
||||||
|
|
||||||
namespace nv {
|
namespace nv {
|
||||||
|
|
||||||
@ -43,9 +43,10 @@ namespace nv {
|
|||||||
public:
|
public:
|
||||||
ClusterFit();
|
ClusterFit();
|
||||||
|
|
||||||
void setColourSet(const ColorSet * set);
|
void setColorSet(const ColorSet * set);
|
||||||
|
void setColorSet(const Vector3 * colors, const float * weights, int count);
|
||||||
|
|
||||||
void setMetric(const Vector4 & w);
|
void setColorWeights(const Vector4 & w);
|
||||||
float bestError() const;
|
float bestError() const;
|
||||||
|
|
||||||
bool compress3(Vector3 * start, Vector3 * end);
|
bool compress3(Vector3 * start, Vector3 * end);
|
||||||
|
@ -246,11 +246,14 @@ unsigned int CompressionOptions::d3d9Format() const
|
|||||||
FOURCC_ATI2, // Format_BC5
|
FOURCC_ATI2, // Format_BC5
|
||||||
FOURCC_DXT1, // Format_DXT1n
|
FOURCC_DXT1, // Format_DXT1n
|
||||||
0, // Format_CTX1
|
0, // Format_CTX1
|
||||||
0, // Format_BC6
|
MAKEFOURCC('B', 'C', '6', 'H'), // Format_BC6
|
||||||
0, // Format_BC7
|
MAKEFOURCC('B', 'C', '7', 'L'), // Format_BC7
|
||||||
0, // Format_RGBE
|
FOURCC_ATI2, // Format_BC5_Luma
|
||||||
|
FOURCC_DXT5, // Format_BC3_RGBM
|
||||||
};
|
};
|
||||||
|
|
||||||
|
NV_COMPILER_CHECK(NV_ARRAY_SIZE(d3d9_formats) == Format_Count);
|
||||||
|
|
||||||
return d3d9_formats[m.format];
|
return d3d9_formats[m.format];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -31,49 +31,90 @@
|
|||||||
#include "nvimage/ColorBlock.h"
|
#include "nvimage/ColorBlock.h"
|
||||||
#include "nvimage/BlockDXT.h"
|
#include "nvimage/BlockDXT.h"
|
||||||
|
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
|
|
||||||
#include <new> // placement new
|
#include <new> // placement new
|
||||||
|
|
||||||
using namespace nv;
|
using namespace nv;
|
||||||
using namespace nvtt;
|
using namespace nvtt;
|
||||||
|
|
||||||
|
|
||||||
void FastCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void FastCompressorBC4::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
BlockATI1 * block = new(output) BlockATI1;
|
BlockATI1 * block = new(output) BlockATI1;
|
||||||
|
|
||||||
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
|
AlphaBlock4x4 tmp;
|
||||||
QuickCompress::compressDXT5A(rgba, &block->alpha);
|
tmp.init(src, 0); // Copy red to alpha
|
||||||
|
QuickCompress::compressDXT5A(tmp, &block->alpha);
|
||||||
}
|
}
|
||||||
|
|
||||||
void FastCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void FastCompressorBC5::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
BlockATI2 * block = new(output) BlockATI2;
|
BlockATI2 * block = new(output) BlockATI2;
|
||||||
|
|
||||||
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
|
AlphaBlock4x4 tmp;
|
||||||
QuickCompress::compressDXT5A(rgba, &block->x);
|
|
||||||
|
tmp.init(src, 0); // Copy red to alpha
|
||||||
|
QuickCompress::compressDXT5A(tmp, &block->x);
|
||||||
|
|
||||||
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
|
tmp.init(src, 1); // Copy green to alpha
|
||||||
QuickCompress::compressDXT5A(rgba, &block->y);
|
QuickCompress::compressDXT5A(tmp, &block->y);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void ProductionCompressorBC4::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void ProductionCompressorBC4::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
BlockATI1 * block = new(output) BlockATI1;
|
BlockATI1 * block = new(output) BlockATI1;
|
||||||
|
|
||||||
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
|
AlphaBlock4x4 tmp;
|
||||||
OptimalCompress::compressDXT5A(rgba, &block->alpha);
|
tmp.init(src, 0); // Copy red to alpha
|
||||||
|
OptimalCompress::compressDXT5A(tmp, &block->alpha);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ProductionCompressorBC5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void ProductionCompressorBC5::compressBlock(ColorBlock & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
BlockATI2 * block = new(output) BlockATI2;
|
BlockATI2 * block = new(output) BlockATI2;
|
||||||
|
|
||||||
|
AlphaBlock4x4 tmp;
|
||||||
|
|
||||||
|
tmp.init(src, 0); // Copy red to alpha
|
||||||
|
OptimalCompress::compressDXT5A(tmp, &block->x);
|
||||||
|
|
||||||
rgba.swizzle(0, 1, 2, 0); // Copy red to alpha
|
tmp.init(src, 1); // Copy green to alpha
|
||||||
OptimalCompress::compressDXT5A(rgba, &block->x);
|
OptimalCompress::compressDXT5A(tmp, &block->y);
|
||||||
|
|
||||||
rgba.swizzle(0, 1, 2, 1); // Copy green to alpha
|
|
||||||
OptimalCompress::compressDXT5A(rgba, &block->y);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void ProductionCompressorBC5_Luma::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
|
{
|
||||||
|
BlockATI2 * block = new(output) BlockATI2;
|
||||||
|
|
||||||
|
AlphaBlock4x4 tmp;
|
||||||
|
tmp.init(set, /*channel=*/0);
|
||||||
|
OptimalCompress::compressDXT5A(tmp, &block->x);
|
||||||
|
|
||||||
|
// Decode block->x
|
||||||
|
AlphaBlock4x4 decoded;
|
||||||
|
block->x.decodeBlock(&decoded);
|
||||||
|
|
||||||
|
const float R = 1.0f / 256.0f; // Maximum residual that we can represent. @@ Tweak this.
|
||||||
|
|
||||||
|
// Compute residual block.
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
float in = set.color(i).x; // [0,1]
|
||||||
|
float out = float(decoded.alpha[i]) / 255.0f; // [0,1]
|
||||||
|
|
||||||
|
float residual = (out - in); // [-1,1], but usually [-R,R]
|
||||||
|
|
||||||
|
// Normalize residual to [-1,1] range.
|
||||||
|
residual /= R;
|
||||||
|
|
||||||
|
// Pack in [0,1] range.
|
||||||
|
residual = residual * 0.5f + 0.5f;
|
||||||
|
|
||||||
|
tmp.alpha[i] = nv::ftoi_round(nv::saturate(residual) * 255.0f);
|
||||||
|
}
|
||||||
|
|
||||||
|
OptimalCompress::compressDXT5A(tmp, &block->y);
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -58,6 +58,13 @@ namespace nv
|
|||||||
virtual uint blockSize() const { return 16; }
|
virtual uint blockSize() const { return 16; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct ProductionCompressorBC5_Luma : public ColorSetCompressor
|
||||||
|
{
|
||||||
|
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||||
|
virtual uint blockSize() const { return 16; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
|
||||||
|
|
||||||
|
@ -24,7 +24,6 @@
|
|||||||
|
|
||||||
#include "CompressorDX11.h"
|
#include "CompressorDX11.h"
|
||||||
|
|
||||||
#include <cstring>
|
|
||||||
#include "nvtt.h"
|
#include "nvtt.h"
|
||||||
#include "CompressionOptions.h"
|
#include "CompressionOptions.h"
|
||||||
#include "nvimage/ColorBlock.h"
|
#include "nvimage/ColorBlock.h"
|
||||||
@ -34,16 +33,16 @@
|
|||||||
#include "bc6h/zoh.h"
|
#include "bc6h/zoh.h"
|
||||||
#include "bc7/avpcl.h"
|
#include "bc7/avpcl.h"
|
||||||
|
|
||||||
|
#include <string.h> // memset
|
||||||
|
|
||||||
using namespace nv;
|
using namespace nv;
|
||||||
using namespace nvtt;
|
using namespace nvtt;
|
||||||
|
|
||||||
|
|
||||||
void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
|
void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
// !!!UNDONE: support channel weights
|
// !!!UNDONE: support channel weights
|
||||||
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
|
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
|
||||||
|
|
||||||
NV_UNUSED(alphaMode); // ZOH does not support alpha.
|
|
||||||
|
|
||||||
if (compressionOptions.pixelType == PixelType_UnsignedFloat ||
|
if (compressionOptions.pixelType == PixelType_UnsignedFloat ||
|
||||||
compressionOptions.pixelType == PixelType_UnsignedNorm ||
|
compressionOptions.pixelType == PixelType_UnsignedNorm ||
|
||||||
@ -56,44 +55,60 @@ void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const Co
|
|||||||
ZOH::Utils::FORMAT = ZOH::SIGNED_F16;
|
ZOH::Utils::FORMAT = ZOH::SIGNED_F16;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert NVTT's tile struct to ZOH's, and convert float to half.
|
// Convert NVTT's tile struct to ZOH's, and convert float to half.
|
||||||
ZOH::Tile zohTile(tile.w, tile.h);
|
ZOH::Tile zohTile(tile.w, tile.h);
|
||||||
memset(zohTile.data, 0, sizeof(zohTile.data));
|
memset(zohTile.data, 0, sizeof(zohTile.data));
|
||||||
memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map));
|
memset(zohTile.importance_map, 0, sizeof(zohTile.importance_map));
|
||||||
for (uint y = 0; y < tile.h; ++y)
|
for (uint y = 0; y < tile.h; ++y)
|
||||||
{
|
{
|
||||||
for (uint x = 0; x < tile.w; ++x)
|
for (uint x = 0; x < tile.w; ++x)
|
||||||
{
|
{
|
||||||
Vector3 color = tile.color(x, y).xyz();
|
Vector4 color = tile.color(x, y);
|
||||||
uint16 rHalf = to_half(color.x);
|
uint16 rHalf = to_half(color.x);
|
||||||
uint16 gHalf = to_half(color.y);
|
uint16 gHalf = to_half(color.y);
|
||||||
uint16 bHalf = to_half(color.z);
|
uint16 bHalf = to_half(color.z);
|
||||||
zohTile.data[y][x].x = ZOH::Tile::half2float(rHalf);
|
zohTile.data[y][x].x = ZOH::Tile::half2float(rHalf);
|
||||||
zohTile.data[y][x].y = ZOH::Tile::half2float(gHalf);
|
zohTile.data[y][x].y = ZOH::Tile::half2float(gHalf);
|
||||||
zohTile.data[y][x].z = ZOH::Tile::half2float(bHalf);
|
zohTile.data[y][x].z = ZOH::Tile::half2float(bHalf);
|
||||||
zohTile.importance_map[y][x] = 1.0f;
|
|
||||||
}
|
if (alphaMode == AlphaMode_Transparency) {
|
||||||
}
|
zohTile.importance_map[y][x] = color.w;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
zohTile.importance_map[y][x] = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
ZOH::compress(zohTile, (char *)output);
|
ZOH::compress(zohTile, (char *)output);
|
||||||
}
|
}
|
||||||
|
|
||||||
void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
|
void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
// !!!UNDONE: support channel weights
|
// !!!UNDONE: support channel weights
|
||||||
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
|
// !!!UNDONE: set flags once, not per block (this is especially sketchy since block compression is multithreaded...)
|
||||||
|
|
||||||
AVPCL::mode_rgb = false;
|
AVPCL::mode_rgb = false;
|
||||||
AVPCL::flag_premult = (alphaMode == AlphaMode_Premultiplied);
|
AVPCL::flag_premult = (alphaMode == AlphaMode_Premultiplied);
|
||||||
AVPCL::flag_nonuniform = false;
|
AVPCL::flag_nonuniform = false;
|
||||||
AVPCL::flag_nonuniform_ati = false;
|
AVPCL::flag_nonuniform_ati = false;
|
||||||
|
|
||||||
|
// Convert NVTT's tile struct to AVPCL's.
|
||||||
|
AVPCL::Tile avpclTile(tile.w, tile.h);
|
||||||
|
memset(avpclTile.data, 0, sizeof(avpclTile.data));
|
||||||
|
for (uint y = 0; y < tile.h; ++y) {
|
||||||
|
for (uint x = 0; x < tile.w; ++x) {
|
||||||
|
Vector4 color = tile.color(x, y);
|
||||||
|
avpclTile.data[y][x] = color * 255.0f;
|
||||||
|
|
||||||
|
/*if (alphaMode == AlphaMode_Transparency) {
|
||||||
|
avpclTile.importance_map[y][x] = color.w;
|
||||||
|
}
|
||||||
|
else*/ {
|
||||||
|
avpclTile.importance_map[y][x] = 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Convert NVTT's tile struct to AVPCL's.
|
AVPCL::compress(avpclTile, (char *)output);
|
||||||
AVPCL::Tile avpclTile(tile.w, tile.h);
|
|
||||||
memset(avpclTile.data, 0, sizeof(avpclTile.data));
|
|
||||||
for (uint y = 0; y < tile.h; ++y)
|
|
||||||
for (uint x = 0; x < tile.w; ++x)
|
|
||||||
avpclTile.data[y][x] = tile.color(x, y) * 255.0f;
|
|
||||||
|
|
||||||
AVPCL::compress(avpclTile, (char *)output);
|
|
||||||
}
|
}
|
||||||
|
@ -112,7 +112,8 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
|
|||||||
QuickCompress::compressDXT5(rgba, block);
|
QuickCompress::compressDXT5(rgba, block);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 0
|
|
||||||
|
#if 1
|
||||||
void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
set.setUniformWeights();
|
set.setUniformWeights();
|
||||||
@ -125,11 +126,14 @@ void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, co
|
|||||||
Color32 c = toColor32(set.colors[0]);
|
Color32 c = toColor32(set.colors[0]);
|
||||||
OptimalCompress::compressDXT1(c, block);
|
OptimalCompress::compressDXT1(c, block);
|
||||||
}
|
}
|
||||||
|
/*else if (set.colorCount == 2) {
|
||||||
|
QuickCompress::compressDXT1(..., block);
|
||||||
|
}*/
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ClusterFit fit;
|
ClusterFit fit;
|
||||||
fit.setMetric(compressionOptions.colorWeight);
|
fit.setColorWeights(compressionOptions.colorWeight);
|
||||||
fit.setColourSet(&set);
|
fit.setColorSet(&set);
|
||||||
|
|
||||||
Vector3 start, end;
|
Vector3 start, end;
|
||||||
fit.compress4(&start, &end);
|
fit.compress4(&start, &end);
|
||||||
@ -142,6 +146,37 @@ void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#elif 1
|
||||||
|
|
||||||
|
|
||||||
|
extern void compress_dxt1_bounding_box_exhaustive(const ColorBlock & input, BlockDXT1 * output);
|
||||||
|
|
||||||
|
|
||||||
|
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
|
{
|
||||||
|
BlockDXT1 * block = new(output) BlockDXT1;
|
||||||
|
|
||||||
|
if (rgba.isSingleColor())
|
||||||
|
{
|
||||||
|
OptimalCompress::compressDXT1(rgba.color(0), block);
|
||||||
|
//compress_dxt1_single_color_optimal(rgba.color(0), block);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Do an exhaustive search inside the bounding box.
|
||||||
|
compress_dxt1_bounding_box_exhaustive(rgba, block);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*else
|
||||||
|
{
|
||||||
|
nvsquish::WeightedClusterFit fit;
|
||||||
|
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
|
||||||
|
|
||||||
|
nvsquish::ColourSet colours((uint8 *)rgba.colors(), 0);
|
||||||
|
fit.SetColourSet(&colours, nvsquish::kDxt1);
|
||||||
|
fit.Compress(output);
|
||||||
|
}*/
|
||||||
|
}
|
||||||
#else
|
#else
|
||||||
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void CompressorDXT1::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
@ -304,6 +339,309 @@ void CompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
void CompressorBC3_RGBM::compressBlock(ColorSet & src, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
|
{
|
||||||
|
BlockDXT5 * block = new(output)BlockDXT5;
|
||||||
|
|
||||||
|
if (alphaMode == AlphaMode_Transparency) {
|
||||||
|
src.setAlphaWeights();
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
src.setUniformWeights();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Decompress the color block and find the M values that reproduce the input most closely. This should compensate for some of the DXT errors.
|
||||||
|
|
||||||
|
// Compress the resulting M values optimally.
|
||||||
|
|
||||||
|
// Repeat this several times until compression error does not improve?
|
||||||
|
|
||||||
|
//Vector3 rgb_block[16];
|
||||||
|
//float m_block[16];
|
||||||
|
|
||||||
|
|
||||||
|
// Init RGB/M block.
|
||||||
|
const float threshold = 0.15f; // @@ Use compression options.
|
||||||
|
#if 0
|
||||||
|
nvsquish::WeightedClusterFit fit;
|
||||||
|
|
||||||
|
ColorBlock rgba;
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
const Vector4 & c = src.color(i);
|
||||||
|
float R = saturate(c.x);
|
||||||
|
float G = saturate(c.y);
|
||||||
|
float B = saturate(c.z);
|
||||||
|
|
||||||
|
float M = max(max(R, G), max(B, threshold));
|
||||||
|
float r = R / M;
|
||||||
|
float g = G / M;
|
||||||
|
float b = B / M;
|
||||||
|
float a = c.w;
|
||||||
|
|
||||||
|
rgba.color(i) = toColor32(Vector4(r, g, b, a));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (rgba.isSingleColor())
|
||||||
|
{
|
||||||
|
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nvsquish::WeightedClusterFit fit;
|
||||||
|
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
|
||||||
|
|
||||||
|
int flags = 0;
|
||||||
|
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||||
|
|
||||||
|
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||||
|
fit.SetColourSet(&colours, 0);
|
||||||
|
fit.Compress(&block->color);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
#if 1
|
||||||
|
ColorSet rgb;
|
||||||
|
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
|
||||||
|
|
||||||
|
if (src.colorCount != 16) {
|
||||||
|
nvDebugBreak();
|
||||||
|
}
|
||||||
|
|
||||||
|
for (uint i = 0; i < src.colorCount; i++) {
|
||||||
|
const Vector4 & c = src.color(i);
|
||||||
|
|
||||||
|
float R = saturate(c.x);
|
||||||
|
float G = saturate(c.y);
|
||||||
|
float B = saturate(c.z);
|
||||||
|
|
||||||
|
float M = max(max(R, G), max(B, threshold));
|
||||||
|
float r = R / M;
|
||||||
|
float g = G / M;
|
||||||
|
float b = B / M;
|
||||||
|
float a = c.w;
|
||||||
|
|
||||||
|
rgb.colors[i] = Vector4(r, g, b, a);
|
||||||
|
rgb.indices[i] = i;
|
||||||
|
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
|
||||||
|
}
|
||||||
|
|
||||||
|
rgb.createMinimalSet(/*ignoreTransparent=*/true);
|
||||||
|
|
||||||
|
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
|
||||||
|
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ClusterFit fit;
|
||||||
|
fit.setColorWeights(compressionOptions.colorWeight);
|
||||||
|
fit.setColorSet(&rgb);
|
||||||
|
|
||||||
|
Vector3 start, end;
|
||||||
|
fit.compress4(&start, &end);
|
||||||
|
|
||||||
|
QuickCompress::outputBlock4(rgb, start, end, &block->color);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// Decompress RGB/M block.
|
||||||
|
nv::ColorBlock RGB;
|
||||||
|
block->color.decodeBlock(&RGB);
|
||||||
|
|
||||||
|
#if 1
|
||||||
|
AlphaBlock4x4 M;
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
const Vector4 & c = src.color(i);
|
||||||
|
float R = saturate(c.x);
|
||||||
|
float G = saturate(c.y);
|
||||||
|
float B = saturate(c.z);
|
||||||
|
|
||||||
|
float r = RGB.color(i).r / 255.0f;
|
||||||
|
float g = RGB.color(i).g / 255.0f;
|
||||||
|
float b = RGB.color(i).b / 255.0f;
|
||||||
|
|
||||||
|
float m = (R / r + G / g + B / b) / 3.0f;
|
||||||
|
//float m = max((R / r + G / g + B / b) / 3.0f, threshold);
|
||||||
|
//float m = max(max(R / r, G / g), max(B / b, threshold));
|
||||||
|
//float m = max(max(R, G), max(B, threshold));
|
||||||
|
|
||||||
|
m = (m - threshold) / (1 - threshold);
|
||||||
|
|
||||||
|
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
|
||||||
|
M.weights[i] = src.weights[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compress M.
|
||||||
|
if (compressionOptions.quality == Quality_Fastest) {
|
||||||
|
QuickCompress::compressDXT5A(M, &block->alpha);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
OptimalCompress::compressDXT5A(M, &block->alpha);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
OptimalCompress::compressDXT5A_RGBM(src, RGB, &block->alpha);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Decompress M.
|
||||||
|
block->alpha.decodeBlock(&M);
|
||||||
|
|
||||||
|
rgb.allocate(src.w, src.h); // @@ Handle smaller blocks.
|
||||||
|
|
||||||
|
for (uint i = 0; i < src.colorCount; i++) {
|
||||||
|
const Vector4 & c = src.color(i);
|
||||||
|
|
||||||
|
float R = saturate(c.x);
|
||||||
|
float G = saturate(c.y);
|
||||||
|
float B = saturate(c.z);
|
||||||
|
|
||||||
|
//float m = max(max(R, G), max(B, threshold));
|
||||||
|
float m = float(M.alpha[i]) / 255.0f * (1 - threshold) + threshold;
|
||||||
|
float r = R / m;
|
||||||
|
float g = G / m;
|
||||||
|
float b = B / m;
|
||||||
|
float a = c.w;
|
||||||
|
|
||||||
|
rgb.colors[i] = Vector4(r, g, b, a);
|
||||||
|
rgb.indices[i] = i;
|
||||||
|
rgb.weights[i] = max(c.w, 0.001f);// src.weights[i]; // IC: For some reason 0 weights are causing problems, even if we eliminate the corresponding colors from the set.
|
||||||
|
}
|
||||||
|
|
||||||
|
rgb.createMinimalSet(/*ignoreTransparent=*/true);
|
||||||
|
|
||||||
|
if (rgb.isSingleColor(/*ignoreAlpha=*/true)) {
|
||||||
|
OptimalCompress::compressDXT1(toColor32(rgb.color(0)), &block->color);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
ClusterFit fit;
|
||||||
|
fit.setMetric(compressionOptions.colorWeight);
|
||||||
|
fit.setColourSet(&rgb);
|
||||||
|
|
||||||
|
Vector3 start, end;
|
||||||
|
fit.compress4(&start, &end);
|
||||||
|
|
||||||
|
QuickCompress::outputBlock4(rgb, start, end, &block->color);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
block->color.decodeBlock(&RGB);
|
||||||
|
|
||||||
|
//AlphaBlock4x4 M;
|
||||||
|
//M.initWeights(src);
|
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
const Vector4 & c = src.color(i);
|
||||||
|
float R = saturate(c.x);
|
||||||
|
float G = saturate(c.y);
|
||||||
|
float B = saturate(c.z);
|
||||||
|
|
||||||
|
float r = RGB.color(i).r / 255.0f;
|
||||||
|
float g = RGB.color(i).g / 255.0f;
|
||||||
|
float b = RGB.color(i).b / 255.0f;
|
||||||
|
|
||||||
|
float m = (R / r + G / g + B / b) / 3.0f;
|
||||||
|
//float m = max((R / r + G / g + B / b) / 3.0f, threshold);
|
||||||
|
//float m = max(max(R / r, G / g), max(B / b, threshold));
|
||||||
|
//float m = max(max(R, G), max(B, threshold));
|
||||||
|
|
||||||
|
m = (m - threshold) / (1 - threshold);
|
||||||
|
|
||||||
|
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
|
||||||
|
M.weights[i] = src.weights[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compress M.
|
||||||
|
if (compressionOptions.quality == Quality_Fastest) {
|
||||||
|
QuickCompress::compressDXT5A(M, &block->alpha);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
OptimalCompress::compressDXT5A(M, &block->alpha);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
src.fromRGBM(M, threshold);
|
||||||
|
|
||||||
|
src.createMinimalSet(/*ignoreTransparent=*/true);
|
||||||
|
|
||||||
|
if (src.isSingleColor(/*ignoreAlpha=*/true)) {
|
||||||
|
OptimalCompress::compressDXT1(src.color(0), &block->color);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// @@ Use our improved compressor.
|
||||||
|
ClusterFit fit;
|
||||||
|
fit.setMetric(compressionOptions.colorWeight);
|
||||||
|
fit.setColourSet(&src);
|
||||||
|
|
||||||
|
Vector3 start, end;
|
||||||
|
fit.compress4(&start, &end);
|
||||||
|
|
||||||
|
if (fit.compress3(&start, &end)) {
|
||||||
|
QuickCompress::outputBlock3(src, start, end, block->color);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
QuickCompress::outputBlock4(src, start, end, block->color);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif // 0
|
||||||
|
|
||||||
|
// @@ Decompress color and compute M that best approximates src with these colors? Then compress M again?
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// RGBM encoding.
|
||||||
|
// Maximize precision.
|
||||||
|
// - Number of possible grey levels:
|
||||||
|
// - Naive: 2^3 = 8
|
||||||
|
// - Better: 2^3 + 2^2 = 12
|
||||||
|
// - How to choose threshold?
|
||||||
|
// - Ideal = Adaptive per block, don't know where to store.
|
||||||
|
// - Adaptive per lightmap. How to compute optimal?
|
||||||
|
// - Fixed: 0.25 in our case. Lightmaps scaled to a fixed [0, 1] range.
|
||||||
|
|
||||||
|
// - Optimal compressor: Interpolation artifacts.
|
||||||
|
|
||||||
|
// - Color transform.
|
||||||
|
// - Measure error in post-tone-mapping color space.
|
||||||
|
// - Assume a simple tone mapping operator. We know minimum and maximum exposure, but don't know exact exposure in game.
|
||||||
|
// - Guess based on average lighmap color? Use fixed exposure, in scaled lightmap space.
|
||||||
|
|
||||||
|
// - Enhanced DXT compressor.
|
||||||
|
// - Typical RGBM encoding as follows:
|
||||||
|
// rgb -> M = max(rgb), RGB=rgb/M -> RGBM
|
||||||
|
// - If we add a compression step (M' = M) and M' < M, then rgb may be greater than 1.
|
||||||
|
// - We could ensure that M' >= M during compression.
|
||||||
|
// - We could clamp RGB anyway.
|
||||||
|
// - We could add a fixed scale value to take into account compression errors and avoid clamping.
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Compress color.
|
||||||
|
/*if (rgba.isSingleColor())
|
||||||
|
{
|
||||||
|
OptimalCompress::compressDXT1(rgba.color(0), &block->color);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
nvsquish::WeightedClusterFit fit;
|
||||||
|
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
|
||||||
|
|
||||||
|
int flags = 0;
|
||||||
|
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||||
|
|
||||||
|
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||||
|
fit.SetColourSet(&colours, 0);
|
||||||
|
fit.Compress(&block->color);
|
||||||
|
}*/
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
#if defined(HAVE_ATITC)
|
#if defined(HAVE_ATITC)
|
||||||
|
|
||||||
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
void AtiCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode alphaMode, uint w, uint h, uint d, void * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
|
||||||
|
@ -64,7 +64,7 @@ namespace nv
|
|||||||
|
|
||||||
|
|
||||||
// Normal CPU compressors.
|
// Normal CPU compressors.
|
||||||
#if 0
|
#if 1
|
||||||
struct CompressorDXT1 : public ColorSetCompressor
|
struct CompressorDXT1 : public ColorSetCompressor
|
||||||
{
|
{
|
||||||
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||||
@ -108,6 +108,12 @@ namespace nv
|
|||||||
virtual uint blockSize() const { return 16; }
|
virtual uint blockSize() const { return 16; }
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct CompressorBC3_RGBM : public ColorSetCompressor
|
||||||
|
{
|
||||||
|
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||||
|
virtual uint blockSize() const { return 16; }
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
// External compressors.
|
// External compressors.
|
||||||
#if defined(HAVE_ATITC)
|
#if defined(HAVE_ATITC)
|
||||||
|
461
src/nvtt/CompressorDXT1.cpp
Normal file
461
src/nvtt/CompressorDXT1.cpp
Normal file
@ -0,0 +1,461 @@
|
|||||||
|
|
||||||
|
#include "CompressorDXT1.h"
|
||||||
|
#include "SingleColorLookup.h"
|
||||||
|
#include "ClusterFit.h"
|
||||||
|
#include "QuickCompressDXT.h" // Deprecate.
|
||||||
|
|
||||||
|
#include "nvimage/ColorBlock.h"
|
||||||
|
#include "nvimage/BlockDXT.h"
|
||||||
|
|
||||||
|
#include "nvmath/Color.inl"
|
||||||
|
#include "nvmath/Vector.inl"
|
||||||
|
#include "nvmath/Fitting.h"
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
|
|
||||||
|
#include "nvcore/Utils.h" // swap
|
||||||
|
|
||||||
|
#include <string.h> // memset
|
||||||
|
|
||||||
|
|
||||||
|
using namespace nv;
|
||||||
|
|
||||||
|
|
||||||
|
inline static void color_block_to_vector_block(const ColorBlock & rgba, Vector3 block[16])
|
||||||
|
{
|
||||||
|
for (int i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
const Color32 c = rgba.color(i);
|
||||||
|
block[i] = Vector3(c.r, c.g, c.b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Vector3 r5g6b5_to_vector3(int r, int g, int b)
|
||||||
|
{
|
||||||
|
Vector3 c;
|
||||||
|
c.x = float((r << 3) | (r >> 2));
|
||||||
|
c.y = float((g << 2) | (g >> 4));
|
||||||
|
c.z = float((b << 3) | (b >> 2));
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Vector3 color_to_vector3(Color32 c)
|
||||||
|
{
|
||||||
|
const float scale = 1.0f / 255.0f;
|
||||||
|
return Vector3(c.r * scale, c.g * scale, c.b * scale);
|
||||||
|
}
|
||||||
|
|
||||||
|
inline Color32 vector3_to_color(Vector3 v)
|
||||||
|
{
|
||||||
|
Color32 color;
|
||||||
|
color.r = U8(ftoi_round(saturate(v.x) * 255));
|
||||||
|
color.g = U8(ftoi_round(saturate(v.y) * 255));
|
||||||
|
color.b = U8(ftoi_round(saturate(v.z) * 255));
|
||||||
|
color.a = 255;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Find first valid color.
|
||||||
|
static bool find_valid_color_rgb(const Vector3 * colors, const float * weights, int count, Vector3 * valid_color)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
if (weights[i] > 0.0f) {
|
||||||
|
*valid_color = colors[i];
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No valid colors.
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
static bool is_single_color_rgb(const Vector3 * colors, const float * weights, int count, Vector3 color)
|
||||||
|
{
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
if (weights[i] > 0.0f) {
|
||||||
|
if (colors[i] != color) return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find similar colors and combine them together.
|
||||||
|
static int reduce_colors(const Vector3 * input_colors, const float * input_weights, Vector3 * colors, float * weights)
|
||||||
|
{
|
||||||
|
int n = 0;
|
||||||
|
for (int i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
Vector3 ci = input_colors[i];
|
||||||
|
float wi = input_weights[i];
|
||||||
|
|
||||||
|
if (wi > 0) {
|
||||||
|
// Find matching color.
|
||||||
|
int j;
|
||||||
|
for (j = 0; j < n; j++) {
|
||||||
|
if (equal(colors[j].x, ci.x) && equal(colors[j].y, ci.y) && equal(colors[j].z, ci.z)) {
|
||||||
|
weights[j] += wi;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// No match found. Add new color.
|
||||||
|
if (j == n) {
|
||||||
|
colors[n] = ci;
|
||||||
|
weights[n] = wi;
|
||||||
|
n++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
nvDebugCheck(n <= 16);
|
||||||
|
|
||||||
|
return n;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Different ways of estimating the error.
|
||||||
|
static float evaluate_mse(const Vector3 & p, const Vector3 & c) {
|
||||||
|
return square(p.x-c.x) + square(p.y-c.y) + square(p.z-c.z);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*static float evaluate_mse(const Vector3 & p, const Vector3 & c, const Vector3 & w) {
|
||||||
|
return ww.x * square(p.x-c.x) + ww.y * square(p.y-c.y) + ww.z * square(p.z-c.z);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
static int evaluate_mse_rgb(const Color32 & p, const Color32 & c) {
|
||||||
|
return square(int(p.r)-c.r) + square(int(p.g)-c.g) + square(int(p.b)-c.b);
|
||||||
|
}
|
||||||
|
|
||||||
|
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c) {
|
||||||
|
float e0 = evaluate_mse(palette[0], c);
|
||||||
|
float e1 = evaluate_mse(palette[1], c);
|
||||||
|
float e2 = evaluate_mse(palette[2], c);
|
||||||
|
float e3 = evaluate_mse(palette[3], c);
|
||||||
|
return min(min(e0, e1), min(e2, e3));
|
||||||
|
}
|
||||||
|
|
||||||
|
static int evaluate_mse(const Color32 palette[4], const Color32 & c) {
|
||||||
|
int e0 = evaluate_mse_rgb(palette[0], c);
|
||||||
|
int e1 = evaluate_mse_rgb(palette[1], c);
|
||||||
|
int e2 = evaluate_mse_rgb(palette[2], c);
|
||||||
|
int e3 = evaluate_mse_rgb(palette[3], c);
|
||||||
|
return min(min(e0, e1), min(e2, e3));
|
||||||
|
}
|
||||||
|
|
||||||
|
static float evaluate_mse(const Vector3 palette[4], const Vector3 & c, int index) {
|
||||||
|
return evaluate_mse(palette[index], c);
|
||||||
|
}
|
||||||
|
|
||||||
|
static int evaluate_mse(const Color32 palette[4], const Color32 & c, int index) {
|
||||||
|
return evaluate_mse_rgb(palette[index], c);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static float evaluate_mse(const BlockDXT1 * output, Vector3 colors[16]) {
|
||||||
|
Color32 palette[4];
|
||||||
|
output->evaluatePalette(palette, /*d3d9=*/false);
|
||||||
|
|
||||||
|
// convert palette to float.
|
||||||
|
Vector3 vector_palette[4];
|
||||||
|
for (int i = 0; i < 4; i++) {
|
||||||
|
vector_palette[i] = color_to_vector3(palette[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// evaluate error for each index.
|
||||||
|
float error = 0.0f;
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
int index = (output->indices >> (2*i)) & 3; // @@ Is this the right order?
|
||||||
|
error += evaluate_mse(vector_palette, colors[i], index);
|
||||||
|
}
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int evaluate_mse(const BlockDXT1 * output, Color32 color, int index) {
|
||||||
|
Color32 palette[4];
|
||||||
|
output->evaluatePalette(palette, /*d3d9=*/false);
|
||||||
|
|
||||||
|
return evaluate_mse(palette, color, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*void output_block3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block)
|
||||||
|
{
|
||||||
|
Vector3 minColor = start * 255.0f;
|
||||||
|
Vector3 maxColor = end * 255.0f;
|
||||||
|
uint16 color0 = roundAndExpand(&minColor);
|
||||||
|
uint16 color1 = roundAndExpand(&maxColor);
|
||||||
|
|
||||||
|
if (color0 > color1) {
|
||||||
|
swap(maxColor, minColor);
|
||||||
|
swap(color0, color1);
|
||||||
|
}
|
||||||
|
|
||||||
|
block->col0 = Color16(color0);
|
||||||
|
block->col1 = Color16(color1);
|
||||||
|
block->indices = compute_indices3(colors, weights, count, maxColor / 255.0f, minColor / 255.0f);
|
||||||
|
|
||||||
|
//optimizeEndPoints3(set, block);
|
||||||
|
}*/
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Single color compressor, based on:
|
||||||
|
// https://mollyrocket.com/forums/viewtopic.php?t=392
|
||||||
|
float nv::compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output)
|
||||||
|
{
|
||||||
|
output->col0.r = OMatch5[c.r][0];
|
||||||
|
output->col0.g = OMatch6[c.g][0];
|
||||||
|
output->col0.b = OMatch5[c.b][0];
|
||||||
|
output->col1.r = OMatch5[c.r][1];
|
||||||
|
output->col1.g = OMatch6[c.g][1];
|
||||||
|
output->col1.b = OMatch5[c.b][1];
|
||||||
|
output->indices = 0xaaaaaaaa;
|
||||||
|
|
||||||
|
if (output->col0.u < output->col1.u)
|
||||||
|
{
|
||||||
|
swap(output->col0.u, output->col1.u);
|
||||||
|
output->indices ^= 0x55555555;
|
||||||
|
}
|
||||||
|
|
||||||
|
return (float) evaluate_mse(output, c, output->indices & 3);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float nv::compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output)
|
||||||
|
{
|
||||||
|
return compress_dxt1_single_color_optimal(vector3_to_color(color), output);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Low quality baseline compressor.
|
||||||
|
float nv::compress_dxt1_least_squares_fit(const Vector3 * input_colors, const Vector3 * colors, const float * weights, int count, BlockDXT1 * output)
|
||||||
|
{
|
||||||
|
// @@ Iterative best end point fit.
|
||||||
|
|
||||||
|
return FLT_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static Color32 bitexpand_color16_to_color32(Color16 c16) {
|
||||||
|
Color32 c32;
|
||||||
|
c32.b = (c16.b << 3) | (c16.b >> 2);
|
||||||
|
c32.g = (c16.g << 2) | (c16.g >> 4);
|
||||||
|
c32.r = (c16.r << 3) | (c16.r >> 2);
|
||||||
|
c32.a = 0xFF;
|
||||||
|
|
||||||
|
//c32.u = ((c16.u << 3) & 0xf8) | ((c16.u << 5) & 0xfc00) | ((c16.u << 8) & 0xf80000);
|
||||||
|
//c32.u |= (c32.u >> 5) & 0x070007;
|
||||||
|
//c32.u |= (c32.u >> 6) & 0x000300;
|
||||||
|
|
||||||
|
return c32;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Color32 bitexpand_color16_to_color32(int r, int g, int b) {
|
||||||
|
Color32 c32;
|
||||||
|
c32.b = (b << 3) | (b >> 2);
|
||||||
|
c32.g = (g << 2) | (g >> 4);
|
||||||
|
c32.r = (r << 3) | (r >> 2);
|
||||||
|
c32.a = 0xFF;
|
||||||
|
return c32;
|
||||||
|
}
|
||||||
|
|
||||||
|
static Color16 truncate_color32_to_color16(Color32 c32) {
|
||||||
|
Color16 c16;
|
||||||
|
c16.b = (c32.b >> 3);
|
||||||
|
c16.g = (c32.g >> 2);
|
||||||
|
c16.r = (c32.r >> 3);
|
||||||
|
return c16;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
static float evaluate_palette4(Color32 palette[4]) {
|
||||||
|
palette[2].r = (2 * palette[0].r + palette[1].r) / 3;
|
||||||
|
palette[2].g = (2 * palette[0].g + palette[1].g) / 3;
|
||||||
|
palette[2].b = (2 * palette[0].b + palette[1].b) / 3;
|
||||||
|
palette[3].r = (2 * palette[1].r + palette[0].r) / 3;
|
||||||
|
palette[3].g = (2 * palette[1].g + palette[0].g) / 3;
|
||||||
|
palette[3].b = (2 * palette[1].b + palette[0].b) / 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float evaluate_palette3(Color32 palette[4]) {
|
||||||
|
palette[2].r = (palette[0].r + palette[1].r) / 2;
|
||||||
|
palette[2].g = (palette[0].g + palette[1].g) / 2;
|
||||||
|
palette[2].b = (palette[0].b + palette[1].b) / 2;
|
||||||
|
palette[3].r = 0;
|
||||||
|
palette[3].g = 0;
|
||||||
|
palette[3].b = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static float evaluate_palette_error(Color32 palette[4], const Color32 * colors, const float * weights, int count) {
|
||||||
|
|
||||||
|
float total = 0.0f;
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
total += (weights[i] * weights[i]) * evaluate_mse(palette, colors[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
float nv::compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, int max_volume, BlockDXT1 * output)
|
||||||
|
{
|
||||||
|
// Compute bounding box.
|
||||||
|
Vector3 min_color(1.0f);
|
||||||
|
Vector3 max_color(0.0f);
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
min_color = min(min_color, colors[i]);
|
||||||
|
max_color = max(max_color, colors[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to 5:6:5
|
||||||
|
int min_r = ftoi_floor(31 * min_color.x);
|
||||||
|
int min_g = ftoi_floor(63 * min_color.y);
|
||||||
|
int min_b = ftoi_floor(31 * min_color.z);
|
||||||
|
int max_r = ftoi_ceil(31 * max_color.x);
|
||||||
|
int max_g = ftoi_ceil(63 * max_color.y);
|
||||||
|
int max_b = ftoi_ceil(31 * max_color.z);
|
||||||
|
|
||||||
|
// Expand the box.
|
||||||
|
int range_r = max_r - min_r;
|
||||||
|
int range_g = max_g - min_g;
|
||||||
|
int range_b = max_b - min_b;
|
||||||
|
|
||||||
|
min_r = max(0, min_r - (range_r + 1) / 1 - 1);
|
||||||
|
min_g = max(0, min_g - (range_g + 1) / 1 - 1);
|
||||||
|
min_b = max(0, min_b - (range_b + 1) / 1 - 1);
|
||||||
|
|
||||||
|
max_r = min(31, max_r + (range_r + 1) / 2 + 1);
|
||||||
|
max_g = min(63, max_g + (range_g + 1) / 2 + 1);
|
||||||
|
max_b = min(31, max_b + (range_b + 1) / 2 + 1);
|
||||||
|
|
||||||
|
// Estimate size of search space.
|
||||||
|
int volume = (max_r-min_r+1) * (max_g-min_g+1) * (max_b-min_b+1);
|
||||||
|
|
||||||
|
// if size under search_limit, then proceed. Note that search_limit is sqrt of number of evaluations.
|
||||||
|
if (volume > max_volume) {
|
||||||
|
return FLT_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
Color32 colors32[16];
|
||||||
|
for (int i = 0; i < count; i++) {
|
||||||
|
colors32[i] = toColor32(Vector4(colors[i], 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
float best_error = FLT_MAX;
|
||||||
|
Color32 best0, best1;
|
||||||
|
|
||||||
|
for(int r0 = min_r; r0 <= max_r; r0++)
|
||||||
|
for(int r1 = max_r; r1 >= r0; r1--)
|
||||||
|
for(int g0 = min_g; g0 <= max_g; g0++)
|
||||||
|
for(int g1 = max_g; g1 >= g0; g1--)
|
||||||
|
for(int b0 = min_b; b0 <= max_b; b0++)
|
||||||
|
for(int b1 = max_b; b1 >= b0; b1--)
|
||||||
|
{
|
||||||
|
Color32 palette[4];
|
||||||
|
palette[0] = bitexpand_color16_to_color32(r1, g1, b1);
|
||||||
|
palette[1] = bitexpand_color16_to_color32(r0, g0, b0);
|
||||||
|
|
||||||
|
// Evaluate error in 4 color mode.
|
||||||
|
evaluate_palette4(palette);
|
||||||
|
|
||||||
|
float error = evaluate_palette_error(palette, colors32, weights, count);
|
||||||
|
|
||||||
|
if (error < best_error) {
|
||||||
|
best_error = error;
|
||||||
|
best0 = palette[0];
|
||||||
|
best1 = palette[1];
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
// Evaluate error in 3 color mode.
|
||||||
|
evaluate_palette3(palette);
|
||||||
|
|
||||||
|
float error = evaluate_palette_error(palette, colors, weights, count);
|
||||||
|
|
||||||
|
if (error < best_error) {
|
||||||
|
best_error = error;
|
||||||
|
best0 = palette[1];
|
||||||
|
best1 = palette[0];
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
output->col0 = truncate_color32_to_color16(best0);
|
||||||
|
output->col1 = truncate_color32_to_color16(best1);
|
||||||
|
|
||||||
|
if (output->col0.u <= output->col1.u) {
|
||||||
|
//output->indices = computeIndices3(colors, best0, best1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//output->indices = computeIndices4(colors, best0, best1);
|
||||||
|
}
|
||||||
|
|
||||||
|
return FLT_MAX;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
float nv::compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output)
|
||||||
|
{
|
||||||
|
ClusterFit fit;
|
||||||
|
//fit.setColorWeights(compressionOptions.colorWeight);
|
||||||
|
fit.setColorWeights(Vector4(1)); // @@ Set color weights.
|
||||||
|
fit.setColorSet(colors, weights, count);
|
||||||
|
|
||||||
|
// start & end are in [0, 1] range.
|
||||||
|
Vector3 start, end;
|
||||||
|
fit.compress4(&start, &end);
|
||||||
|
|
||||||
|
if (fit.compress3(&start, &end)) {
|
||||||
|
//output_block3(input_colors, start, end, block);
|
||||||
|
// @@ Output block.
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
//output_block4(input_colors, start, end, block);
|
||||||
|
// @@ Output block.
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
float nv::compress_dxt1(const Vector3 input_colors[16], const float input_weights[16], BlockDXT1 * output)
|
||||||
|
{
|
||||||
|
Vector3 colors[16];
|
||||||
|
float weights[16];
|
||||||
|
int count = reduce_colors(input_colors, input_weights, colors, weights);
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
// Output trivial block.
|
||||||
|
output->col0.u = 0;
|
||||||
|
output->col1.u = 0;
|
||||||
|
output->indices = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count == 1) {
|
||||||
|
return compress_dxt1_single_color_optimal(colors[0], output);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If high quality:
|
||||||
|
//error = compress_dxt1_bounding_box_exhaustive(colors, weigths, count, 3200, error, output);
|
||||||
|
//if (error < FLT_MAX) return error;
|
||||||
|
|
||||||
|
// This is pretty fast and in some cases can produces better quality than cluster fit.
|
||||||
|
// error = compress_dxt1_least_squares_fit(colors, weigths, error, output);
|
||||||
|
|
||||||
|
//
|
||||||
|
float error = compress_dxt1_cluster_fit(input_colors, colors, weights, count, output);
|
||||||
|
|
||||||
|
return error;
|
||||||
|
}
|
||||||
|
|
38
src/nvtt/CompressorDXT1.h
Normal file
38
src/nvtt/CompressorDXT1.h
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
|
||||||
|
namespace nv {
|
||||||
|
|
||||||
|
class Color32;
|
||||||
|
struct ColorBlock;
|
||||||
|
struct BlockDXT1;
|
||||||
|
class Vector3;
|
||||||
|
|
||||||
|
// All these functions return MSE.
|
||||||
|
|
||||||
|
// Optimal compressors:
|
||||||
|
/*float compress_dxt1_single_color_optimal(const Color32 & rgb, BlockDXT1 * output);
|
||||||
|
float compress_dxt1_single_color_optimal(const ColorBlock & input, BlockDXT1 * output);
|
||||||
|
float compress_dxt1_optimal(const ColorBlock & input, BlockDXT1 * output);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
// Brute force with restricted search space:
|
||||||
|
float compress_dxt1_bounding_box_exhaustive(const ColorBlock & input, BlockDXT1 * output);
|
||||||
|
float compress_dxt1_best_fit_line_exhaustive(const ColorBlock & input, BlockDXT1 * output);
|
||||||
|
|
||||||
|
|
||||||
|
// Fast least squres fitting compressors:
|
||||||
|
float compress_dxt1_least_squares_fit(const ColorBlock & input, BlockDXT1 * output);
|
||||||
|
float compress_dxt1_least_squares_fit_iterative(const ColorBlock & input, BlockDXT1 * output);
|
||||||
|
*/
|
||||||
|
|
||||||
|
float compress_dxt1_single_color_optimal(Color32 c, BlockDXT1 * output);
|
||||||
|
float compress_dxt1_single_color_optimal(const Vector3 & color, BlockDXT1 * output);
|
||||||
|
|
||||||
|
float compress_dxt1_least_squares_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output);
|
||||||
|
float compress_dxt1_bounding_box_exhaustive(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, int search_limit, BlockDXT1 * output);
|
||||||
|
float compress_dxt1_cluster_fit(const Vector3 input_colors[16], const Vector3 * colors, const float * weights, int count, BlockDXT1 * output);
|
||||||
|
|
||||||
|
|
||||||
|
float compress_dxt1(const Vector3 colors[16], const float weights[16], BlockDXT1 * output);
|
||||||
|
|
||||||
|
}
|
@ -32,6 +32,7 @@
|
|||||||
|
|
||||||
#include "nvmath/Color.h"
|
#include "nvmath/Color.h"
|
||||||
#include "nvmath/Half.h"
|
#include "nvmath/Half.h"
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
|
|
||||||
#include "nvcore/Debug.h"
|
#include "nvcore/Debug.h"
|
||||||
|
|
||||||
@ -360,7 +361,19 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
|
|||||||
ib = iround(clamp(b * 65535.0f, 0.0f, 65535.0f));
|
ib = iround(clamp(b * 65535.0f, 0.0f, 65535.0f));
|
||||||
ia = iround(clamp(a * 65535.0f, 0.0f, 65535.0f));
|
ia = iround(clamp(a * 65535.0f, 0.0f, 65535.0f));
|
||||||
}
|
}
|
||||||
|
else if (compressionOptions.pixelType == nvtt::PixelType_SignedNorm) {
|
||||||
|
// @@
|
||||||
|
}
|
||||||
|
else if (compressionOptions.pixelType == nvtt::PixelType_UnsignedInt) {
|
||||||
|
ir = iround(clamp(r, 0.0f, 65535.0f));
|
||||||
|
ig = iround(clamp(g, 0.0f, 65535.0f));
|
||||||
|
ib = iround(clamp(b, 0.0f, 65535.0f));
|
||||||
|
ia = iround(clamp(a, 0.0f, 65535.0f));
|
||||||
|
}
|
||||||
|
else if (compressionOptions.pixelType == nvtt::PixelType_SignedInt) {
|
||||||
|
// @@
|
||||||
|
}
|
||||||
|
|
||||||
uint p = 0;
|
uint p = 0;
|
||||||
p |= PixelFormat::convert(ir, 16, rsize) << rshift;
|
p |= PixelFormat::convert(ir, 16, rsize) << rshift;
|
||||||
p |= PixelFormat::convert(ig, 16, gsize) << gshift;
|
p |= PixelFormat::convert(ig, 16, gsize) << gshift;
|
||||||
|
@ -268,9 +268,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
|
|||||||
if (!img.isNormalMap()) {
|
if (!img.isNormalMap()) {
|
||||||
img.toLinear(inputOptions.inputGamma);
|
img.toLinear(inputOptions.inputGamma);
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
img.expandNormals();
|
|
||||||
}
|
|
||||||
|
|
||||||
// Resize input.
|
// Resize input.
|
||||||
img.resize(w, h, d, ResizeFilter_Box);
|
img.resize(w, h, d, ResizeFilter_Box);
|
||||||
@ -279,9 +276,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
|
|||||||
if (!img.isNormalMap()) {
|
if (!img.isNormalMap()) {
|
||||||
tmp.toGamma(inputOptions.outputGamma);
|
tmp.toGamma(inputOptions.outputGamma);
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
tmp.packNormals();
|
|
||||||
}
|
|
||||||
|
|
||||||
quantize(tmp, compressionOptions);
|
quantize(tmp, compressionOptions);
|
||||||
compress(tmp, f, 0, compressionOptions, outputOptions);
|
compress(tmp, f, 0, compressionOptions, outputOptions);
|
||||||
@ -310,9 +304,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
|
|||||||
if (!img.isNormalMap()) {
|
if (!img.isNormalMap()) {
|
||||||
img.toLinear(inputOptions.inputGamma);
|
img.toLinear(inputOptions.inputGamma);
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
img.expandNormals();
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
|
if (inputOptions.mipmapFilter == MipmapFilter_Kaiser) {
|
||||||
@ -332,7 +323,6 @@ bool Compressor::Private::compress(const InputOptions::Private & inputOptions, c
|
|||||||
img.normalizeNormalMap();
|
img.normalizeNormalMap();
|
||||||
}
|
}
|
||||||
tmp = img;
|
tmp = img;
|
||||||
tmp.packNormals();
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
tmp = img;
|
tmp = img;
|
||||||
@ -485,34 +475,38 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
|
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
|
||||||
header.setDX10Format(DXGI_FORMAT_BC1_UNORM);
|
header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC1_UNORM_SRGB : DXGI_FORMAT_BC1_UNORM);
|
||||||
if (compressionOptions.format == Format_DXT1a) header.setHasAlphaFlag(true);
|
if (compressionOptions.format == Format_DXT1a) header.setHasAlphaFlag(true);
|
||||||
if (isNormalMap) header.setNormalFlag(true);
|
if (isNormalMap) header.setNormalFlag(true);
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_DXT3) {
|
else if (compressionOptions.format == Format_DXT3) {
|
||||||
header.setDX10Format(DXGI_FORMAT_BC2_UNORM);
|
header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC2_UNORM_SRGB : DXGI_FORMAT_BC2_UNORM);
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_DXT5) {
|
else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
|
||||||
header.setDX10Format(DXGI_FORMAT_BC3_UNORM);
|
header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC3_UNORM_SRGB : DXGI_FORMAT_BC3_UNORM);
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_DXT5n) {
|
else if (compressionOptions.format == Format_DXT5n) {
|
||||||
header.setDX10Format(DXGI_FORMAT_BC3_UNORM);
|
header.setDX10Format(DXGI_FORMAT_BC3_UNORM);
|
||||||
if (isNormalMap) header.setNormalFlag(true);
|
if (isNormalMap) header.setNormalFlag(true);
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_BC4) {
|
else if (compressionOptions.format == Format_BC4) {
|
||||||
header.setDX10Format(DXGI_FORMAT_BC4_UNORM);
|
header.setDX10Format(DXGI_FORMAT_BC4_UNORM); // DXGI_FORMAT_BC4_SNORM ?
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_BC5) {
|
else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) {
|
||||||
header.setDX10Format(DXGI_FORMAT_BC5_UNORM);
|
header.setDX10Format(DXGI_FORMAT_BC5_UNORM); // DXGI_FORMAT_BC5_SNORM ?
|
||||||
if (isNormalMap) header.setNormalFlag(true);
|
if (isNormalMap) header.setNormalFlag(true);
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_BC6) {
|
else if (compressionOptions.format == Format_BC6) {
|
||||||
header.setDX10Format(DXGI_FORMAT_BC6H_UF16);
|
if (compressionOptions.pixelType == PixelType_Float) header.setDX10Format(DXGI_FORMAT_BC6H_SF16);
|
||||||
|
/*if (compressionOptions.pixelType == PixelType_UnsignedFloat)*/ header.setDX10Format(DXGI_FORMAT_BC6H_UF16); // By default we assume unsigned.
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_BC7) {
|
else if (compressionOptions.format == Format_BC7) {
|
||||||
header.setDX10Format(DXGI_FORMAT_BC7_UNORM);
|
header.setDX10Format(outputOptions.srgb ? DXGI_FORMAT_BC7_UNORM_SRGB : DXGI_FORMAT_BC7_UNORM);
|
||||||
if (isNormalMap) header.setNormalFlag(true);
|
if (isNormalMap) header.setNormalFlag(true);
|
||||||
}
|
}
|
||||||
|
else if (compressionOptions.format == Format_CTX1) {
|
||||||
|
supported = false;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
supported = false;
|
supported = false;
|
||||||
}
|
}
|
||||||
@ -597,7 +591,7 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
|
|||||||
else if (compressionOptions.format == Format_DXT3) {
|
else if (compressionOptions.format == Format_DXT3) {
|
||||||
header.setFourCC('D', 'X', 'T', '3');
|
header.setFourCC('D', 'X', 'T', '3');
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_DXT5) {
|
else if (compressionOptions.format == Format_DXT5 || compressionOptions.format == Format_BC3_RGBM) {
|
||||||
header.setFourCC('D', 'X', 'T', '5');
|
header.setFourCC('D', 'X', 'T', '5');
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_DXT5n) {
|
else if (compressionOptions.format == Format_DXT5n) {
|
||||||
@ -611,19 +605,21 @@ bool Compressor::Private::outputHeader(nvtt::TextureType textureType, int w, int
|
|||||||
else if (compressionOptions.format == Format_BC4) {
|
else if (compressionOptions.format == Format_BC4) {
|
||||||
header.setFourCC('A', 'T', 'I', '1');
|
header.setFourCC('A', 'T', 'I', '1');
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_BC5) {
|
else if (compressionOptions.format == Format_BC5 || compressionOptions.format == Format_BC5_Luma) {
|
||||||
header.setFourCC('A', 'T', 'I', '2');
|
header.setFourCC('A', 'T', 'I', '2');
|
||||||
if (isNormalMap) {
|
if (isNormalMap) {
|
||||||
header.setNormalFlag(true);
|
header.setNormalFlag(true);
|
||||||
header.setSwizzleCode('A', '2', 'X', 'Y');
|
header.setSwizzleCode('A', '2', 'X', 'Y');
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_BC6) { // @@ This is not supported by D3DX. Always use DX10 header with BC6-7 formats.
|
else if (compressionOptions.format == Format_BC6) {
|
||||||
header.setFourCC('Z', 'O', 'H', ' ');
|
header.setFourCC('Z', 'O', 'H', ' '); // This is not supported by D3DX. Always use DX10 header with BC6-7 formats.
|
||||||
|
supported = false;
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_BC7) {
|
else if (compressionOptions.format == Format_BC7) {
|
||||||
header.setFourCC('Z', 'O', 'L', 'A');
|
header.setFourCC('Z', 'O', 'L', 'A'); // This is not supported by D3DX. Always use DX10 header with BC6-7 formats.
|
||||||
if (isNormalMap) header.setNormalFlag(true);
|
if (isNormalMap) header.setNormalFlag(true);
|
||||||
|
supported = false;
|
||||||
}
|
}
|
||||||
else if (compressionOptions.format == Format_CTX1) {
|
else if (compressionOptions.format == Format_CTX1) {
|
||||||
header.setFourCC('C', 'T', 'X', '1');
|
header.setFourCC('C', 'T', 'X', '1');
|
||||||
@ -777,6 +773,14 @@ CompressorInterface * Compressor::Private::chooseCpuCompressor(const Compression
|
|||||||
{
|
{
|
||||||
return new CompressorBC7;
|
return new CompressorBC7;
|
||||||
}
|
}
|
||||||
|
else if (compressionOptions.format == Format_BC5_Luma)
|
||||||
|
{
|
||||||
|
return new ProductionCompressorBC5_Luma;
|
||||||
|
}
|
||||||
|
else if (compressionOptions.format == Format_BC3_RGBM)
|
||||||
|
{
|
||||||
|
return new CompressorBC3_RGBM;
|
||||||
|
}
|
||||||
|
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
@ -320,7 +320,7 @@ bool CubeSurface::load(const char * fileName, int mipmap)
|
|||||||
if (mipmap < 0) {
|
if (mipmap < 0) {
|
||||||
mipmap = dds.mipmapCount() - 1 - mipmap;
|
mipmap = dds.mipmapCount() - 1 - mipmap;
|
||||||
}
|
}
|
||||||
if (mipmap < 0 || mipmap > toI32(dds.mipmapCount())) return false;
|
if (mipmap < 0 || mipmap > I32(dds.mipmapCount())) return false;
|
||||||
|
|
||||||
|
|
||||||
nvtt::InputFormat inputFormat = nvtt::InputFormat_RGBA_16F;
|
nvtt::InputFormat inputFormat = nvtt::InputFormat_RGBA_16F;
|
||||||
@ -328,12 +328,14 @@ bool CubeSurface::load(const char * fileName, int mipmap)
|
|||||||
if (dds.header.hasDX10Header()) {
|
if (dds.header.hasDX10Header()) {
|
||||||
if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R16G16B16A16_FLOAT) inputFormat = nvtt::InputFormat_RGBA_16F;
|
if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R16G16B16A16_FLOAT) inputFormat = nvtt::InputFormat_RGBA_16F;
|
||||||
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32G32B32A32_FLOAT) inputFormat = nvtt::InputFormat_RGBA_32F;
|
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32G32B32A32_FLOAT) inputFormat = nvtt::InputFormat_RGBA_32F;
|
||||||
|
else if (dds.header.header10.dxgiFormat == DXGI_FORMAT_R32_FLOAT) inputFormat = nvtt::InputFormat_R_32F;
|
||||||
else return false;
|
else return false;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if ((dds.header.pf.flags & DDPF_FOURCC) != 0) {
|
if ((dds.header.pf.flags & DDPF_FOURCC) != 0) {
|
||||||
if (dds.header.pf.fourcc == D3DFMT_A16B16G16R16F) inputFormat = nvtt::InputFormat_RGBA_16F;
|
if (dds.header.pf.fourcc == D3DFMT_A16B16G16R16F) inputFormat = nvtt::InputFormat_RGBA_16F;
|
||||||
else if (dds.header.pf.fourcc == D3DFMT_A32B32G32R32F) inputFormat = nvtt::InputFormat_RGBA_32F;
|
else if (dds.header.pf.fourcc == D3DFMT_A32B32G32R32F) inputFormat = nvtt::InputFormat_RGBA_32F;
|
||||||
|
else if (dds.header.pf.fourcc == D3DFMT_R32F) inputFormat = nvtt::InputFormat_R_32F;
|
||||||
else return false;
|
else return false;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -594,7 +596,7 @@ Vector3 CubeSurface::Private::applyAngularFilter(const Vector3 & filterDir, floa
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int L = toI32(edgeLength-1);
|
const int L = I32(edgeLength-1);
|
||||||
int x0 = 0, x1 = L;
|
int x0 = 0, x1 = L;
|
||||||
int y0 = 0, y1 = L;
|
int y0 = 0, y1 = L;
|
||||||
|
|
||||||
@ -715,7 +717,7 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
const int L = toI32(edgeLength-1);
|
const int L = I32(edgeLength-1);
|
||||||
int x0 = 0, x1 = L;
|
int x0 = 0, x1 = L;
|
||||||
int y0 = 0, y1 = L;
|
int y0 = 0, y1 = L;
|
||||||
|
|
||||||
|
@ -202,18 +202,22 @@ bool InputOptions::setMipmapData(const void * data, int width, int height, int d
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
int imageSize = width * height * depth * 4;
|
int imageSize = width * height * depth;
|
||||||
if (m.inputFormat == InputFormat_BGRA_8UB)
|
if (m.inputFormat == InputFormat_BGRA_8UB)
|
||||||
{
|
{
|
||||||
imageSize *= sizeof(uint8);
|
imageSize *= 4 * sizeof(uint8);
|
||||||
}
|
}
|
||||||
else if (m.inputFormat == InputFormat_RGBA_16F)
|
else if (m.inputFormat == InputFormat_RGBA_16F)
|
||||||
{
|
{
|
||||||
imageSize *= sizeof(uint16);
|
imageSize *= 4 * sizeof(uint16);
|
||||||
}
|
}
|
||||||
else if (m.inputFormat == InputFormat_RGBA_32F)
|
else if (m.inputFormat == InputFormat_RGBA_32F)
|
||||||
{
|
{
|
||||||
imageSize *= sizeof(float);
|
imageSize *= 4 * sizeof(float);
|
||||||
|
}
|
||||||
|
else if (m.inputFormat == InputFormat_R_32F)
|
||||||
|
{
|
||||||
|
imageSize *= 1 * sizeof(float);
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -32,7 +32,8 @@
|
|||||||
|
|
||||||
#include <nvcore/Utils.h> // swap
|
#include <nvcore/Utils.h> // swap
|
||||||
|
|
||||||
#include <limits.h>
|
#include <limits.h> // INT_MAX
|
||||||
|
#include <float.h> // FLT_MAX
|
||||||
|
|
||||||
using namespace nv;
|
using namespace nv;
|
||||||
using namespace OptimalCompress;
|
using namespace OptimalCompress;
|
||||||
@ -185,16 +186,16 @@ namespace
|
|||||||
return totalError;
|
return totalError;
|
||||||
}*/
|
}*/
|
||||||
|
|
||||||
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block, int bestError = INT_MAX)
|
static float computeAlphaError(const AlphaBlock4x4 & src, const AlphaBlockDXT5 * dst, float bestError = FLT_MAX)
|
||||||
{
|
{
|
||||||
uint8 alphas[8];
|
uint8 alphas[8];
|
||||||
block->evaluatePalette(alphas, false); // @@ Use target decoder.
|
dst->evaluatePalette(alphas, false); // @@ Use target decoder.
|
||||||
|
|
||||||
int totalError = 0;
|
float totalError = 0;
|
||||||
|
|
||||||
for (uint i = 0; i < 16; i++)
|
for (uint i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
uint8 alpha = rgba.color(i).a;
|
uint8 alpha = src.alpha[i];
|
||||||
|
|
||||||
int minDist = INT_MAX;
|
int minDist = INT_MAX;
|
||||||
for (uint p = 0; p < 8; p++)
|
for (uint p = 0; p < 8; p++)
|
||||||
@ -203,7 +204,7 @@ namespace
|
|||||||
minDist = min(dist, minDist);
|
minDist = min(dist, minDist);
|
||||||
}
|
}
|
||||||
|
|
||||||
totalError += minDist;
|
totalError += minDist * src.weights[i];
|
||||||
|
|
||||||
if (totalError > bestError)
|
if (totalError > bestError)
|
||||||
{
|
{
|
||||||
@ -215,14 +216,14 @@ namespace
|
|||||||
return totalError;
|
return totalError;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
static void computeAlphaIndices(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst)
|
||||||
{
|
{
|
||||||
uint8 alphas[8];
|
uint8 alphas[8];
|
||||||
block->evaluatePalette(alphas, false); // @@ Use target decoder.
|
dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder.
|
||||||
|
|
||||||
for (uint i = 0; i < 16; i++)
|
for (uint i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
uint8 alpha = rgba.color(i).a;
|
uint8 alpha = src.alpha[i];
|
||||||
|
|
||||||
int minDist = INT_MAX;
|
int minDist = INT_MAX;
|
||||||
int bestIndex = 8;
|
int bestIndex = 8;
|
||||||
@ -238,7 +239,7 @@ namespace
|
|||||||
}
|
}
|
||||||
nvDebugCheck(bestIndex < 8);
|
nvDebugCheck(bestIndex < 8);
|
||||||
|
|
||||||
block->setIndex(i, bestIndex);
|
dst->setIndex(i, bestIndex);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -252,19 +253,19 @@ namespace
|
|||||||
// https://mollyrocket.com/forums/viewtopic.php?t=392
|
// https://mollyrocket.com/forums/viewtopic.php?t=392
|
||||||
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
|
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
|
||||||
{
|
{
|
||||||
dxtBlock->col0.r = OMatch5[c.r][0];
|
dxtBlock->col0.r = OMatch5[c.r][0];
|
||||||
dxtBlock->col0.g = OMatch6[c.g][0];
|
dxtBlock->col0.g = OMatch6[c.g][0];
|
||||||
dxtBlock->col0.b = OMatch5[c.b][0];
|
dxtBlock->col0.b = OMatch5[c.b][0];
|
||||||
dxtBlock->col1.r = OMatch5[c.r][1];
|
dxtBlock->col1.r = OMatch5[c.r][1];
|
||||||
dxtBlock->col1.g = OMatch6[c.g][1];
|
dxtBlock->col1.g = OMatch6[c.g][1];
|
||||||
dxtBlock->col1.b = OMatch5[c.b][1];
|
dxtBlock->col1.b = OMatch5[c.b][1];
|
||||||
dxtBlock->indices = 0xaaaaaaaa;
|
dxtBlock->indices = 0xaaaaaaaa;
|
||||||
|
|
||||||
if (dxtBlock->col0.u < dxtBlock->col1.u)
|
if (dxtBlock->col0.u < dxtBlock->col1.u)
|
||||||
{
|
{
|
||||||
swap(dxtBlock->col0.u, dxtBlock->col1.u);
|
swap(dxtBlock->col0.u, dxtBlock->col1.u);
|
||||||
dxtBlock->indices ^= 0x55555555;
|
dxtBlock->indices ^= 0x55555555;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void OptimalCompress::compressDXT1a(Color32 c, uint alphaMask, BlockDXT1 * dxtBlock)
|
void OptimalCompress::compressDXT1a(Color32 c, uint alphaMask, BlockDXT1 * dxtBlock)
|
||||||
@ -481,46 +482,68 @@ void OptimalCompress::compressDXT1_Luma(const ColorBlock & rgba, BlockDXT1 * blo
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
|
void OptimalCompress::compressDXT3A(const AlphaBlock4x4 & src, AlphaBlockDXT3 * dst)
|
||||||
{
|
{
|
||||||
dxtBlock->alpha0 = quantize4(rgba.color(0).a);
|
dst->alpha0 = quantize4(src.alpha[0]);
|
||||||
dxtBlock->alpha1 = quantize4(rgba.color(1).a);
|
dst->alpha1 = quantize4(src.alpha[1]);
|
||||||
dxtBlock->alpha2 = quantize4(rgba.color(2).a);
|
dst->alpha2 = quantize4(src.alpha[2]);
|
||||||
dxtBlock->alpha3 = quantize4(rgba.color(3).a);
|
dst->alpha3 = quantize4(src.alpha[3]);
|
||||||
dxtBlock->alpha4 = quantize4(rgba.color(4).a);
|
dst->alpha4 = quantize4(src.alpha[4]);
|
||||||
dxtBlock->alpha5 = quantize4(rgba.color(5).a);
|
dst->alpha5 = quantize4(src.alpha[5]);
|
||||||
dxtBlock->alpha6 = quantize4(rgba.color(6).a);
|
dst->alpha6 = quantize4(src.alpha[6]);
|
||||||
dxtBlock->alpha7 = quantize4(rgba.color(7).a);
|
dst->alpha7 = quantize4(src.alpha[7]);
|
||||||
dxtBlock->alpha8 = quantize4(rgba.color(8).a);
|
dst->alpha8 = quantize4(src.alpha[8]);
|
||||||
dxtBlock->alpha9 = quantize4(rgba.color(9).a);
|
dst->alpha9 = quantize4(src.alpha[9]);
|
||||||
dxtBlock->alphaA = quantize4(rgba.color(10).a);
|
dst->alphaA = quantize4(src.alpha[10]);
|
||||||
dxtBlock->alphaB = quantize4(rgba.color(11).a);
|
dst->alphaB = quantize4(src.alpha[11]);
|
||||||
dxtBlock->alphaC = quantize4(rgba.color(12).a);
|
dst->alphaC = quantize4(src.alpha[12]);
|
||||||
dxtBlock->alphaD = quantize4(rgba.color(13).a);
|
dst->alphaD = quantize4(src.alpha[13]);
|
||||||
dxtBlock->alphaE = quantize4(rgba.color(14).a);
|
dst->alphaE = quantize4(src.alpha[14]);
|
||||||
dxtBlock->alphaF = quantize4(rgba.color(15).a);
|
dst->alphaF = quantize4(src.alpha[15]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OptimalCompress::compressDXT3A(const ColorBlock & src, AlphaBlockDXT3 * dst)
|
||||||
|
{
|
||||||
|
AlphaBlock4x4 tmp;
|
||||||
|
tmp.init(src, 3);
|
||||||
|
compressDXT3A(tmp, dst);
|
||||||
|
}
|
||||||
|
|
||||||
void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
|
void OptimalCompress::compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst)
|
||||||
{
|
{
|
||||||
uint8 mina = 255;
|
uint8 mina = 255;
|
||||||
uint8 maxa = 0;
|
uint8 maxa = 0;
|
||||||
|
|
||||||
|
uint8 mina_no01 = 255;
|
||||||
|
uint8 maxa_no01 = 0;
|
||||||
|
|
||||||
// Get min/max alpha.
|
// Get min/max alpha.
|
||||||
for (uint i = 0; i < 16; i++)
|
for (uint i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
uint8 alpha = rgba.color(i).a;
|
uint8 alpha = src.alpha[i];
|
||||||
mina = min(mina, alpha);
|
mina = min(mina, alpha);
|
||||||
maxa = max(maxa, alpha);
|
maxa = max(maxa, alpha);
|
||||||
|
|
||||||
|
if (alpha != 0 && alpha != 255) {
|
||||||
|
mina_no01 = min(mina_no01, alpha);
|
||||||
|
maxa_no01 = max(maxa_no01, alpha);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dxtBlock->alpha0 = maxa;
|
if (maxa - mina < 8) {
|
||||||
dxtBlock->alpha1 = mina;
|
dst->alpha0 = maxa;
|
||||||
|
dst->alpha1 = mina;
|
||||||
|
|
||||||
if (maxa - mina > 8)
|
nvDebugCheck(computeAlphaError(src, dst) == 0);
|
||||||
{
|
}
|
||||||
int besterror = computeAlphaError(rgba, dxtBlock);
|
else if (maxa_no01 - mina_no01 < 6) {
|
||||||
|
dst->alpha0 = mina_no01;
|
||||||
|
dst->alpha1 = maxa_no01;
|
||||||
|
|
||||||
|
nvDebugCheck(computeAlphaError(src, dst) == 0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
float besterror = computeAlphaError(src, dst);
|
||||||
int besta0 = maxa;
|
int besta0 = maxa;
|
||||||
int besta1 = mina;
|
int besta1 = mina;
|
||||||
|
|
||||||
@ -535,9 +558,9 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
|
|||||||
{
|
{
|
||||||
nvDebugCheck(a0 - a1 > 8);
|
nvDebugCheck(a0 - a1 > 8);
|
||||||
|
|
||||||
dxtBlock->alpha0 = a0;
|
dst->alpha0 = a0;
|
||||||
dxtBlock->alpha1 = a1;
|
dst->alpha1 = a1;
|
||||||
int error = computeAlphaError(rgba, dxtBlock, besterror);
|
float error = computeAlphaError(src, dst, besterror);
|
||||||
|
|
||||||
if (error < besterror)
|
if (error < besterror)
|
||||||
{
|
{
|
||||||
@ -548,10 +571,241 @@ void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dx
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
dxtBlock->alpha0 = besta0;
|
// Try using the 6 step encoding.
|
||||||
dxtBlock->alpha1 = besta1;
|
/*if (mina == 0 || maxa == 255)*/ {
|
||||||
|
|
||||||
|
// Expand search space a bit.
|
||||||
|
const int alphaExpand = 6;
|
||||||
|
mina_no01 = (mina_no01 <= alphaExpand) ? 0 : mina_no01 - alphaExpand;
|
||||||
|
maxa_no01 = (maxa_no01 >= 255 - alphaExpand) ? 255 : maxa_no01 + alphaExpand;
|
||||||
|
|
||||||
|
for (int a0 = mina_no01 + 9; a0 < maxa_no01; a0++)
|
||||||
|
{
|
||||||
|
for (int a1 = mina_no01; a1 < a0 - 8; a1++)
|
||||||
|
{
|
||||||
|
nvDebugCheck(a0 - a1 > 8);
|
||||||
|
|
||||||
|
dst->alpha0 = a1;
|
||||||
|
dst->alpha1 = a0;
|
||||||
|
float error = computeAlphaError(src, dst, besterror);
|
||||||
|
|
||||||
|
if (error < besterror)
|
||||||
|
{
|
||||||
|
besterror = error;
|
||||||
|
besta0 = a1;
|
||||||
|
besta1 = a0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dst->alpha0 = besta0;
|
||||||
|
dst->alpha1 = besta1;
|
||||||
}
|
}
|
||||||
|
|
||||||
computeAlphaIndices(rgba, dxtBlock);
|
computeAlphaIndices(src, dst);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void OptimalCompress::compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst)
|
||||||
|
{
|
||||||
|
AlphaBlock4x4 tmp;
|
||||||
|
tmp.init(src, 3);
|
||||||
|
compressDXT5A(tmp, dst);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#include "nvmath/Vector.inl"
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
|
const float threshold = 0.15f;
|
||||||
|
|
||||||
|
static float computeAlphaError_RGBM(const ColorSet & src, const ColorBlock & RGB, const AlphaBlockDXT5 * dst, float bestError = FLT_MAX)
|
||||||
|
{
|
||||||
|
uint8 alphas[8];
|
||||||
|
dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder.
|
||||||
|
|
||||||
|
float totalError = 0;
|
||||||
|
|
||||||
|
for (uint i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
float R = src.color(i).x;
|
||||||
|
float G = src.color(i).y;
|
||||||
|
float B = src.color(i).z;
|
||||||
|
|
||||||
|
float r = float(RGB.color(i).r) / 255.0f;
|
||||||
|
float g = float(RGB.color(i).g) / 255.0f;
|
||||||
|
float b = float(RGB.color(i).b) / 255.0f;
|
||||||
|
|
||||||
|
float minDist = FLT_MAX;
|
||||||
|
for (uint p = 0; p < 8; p++)
|
||||||
|
{
|
||||||
|
// Compute M.
|
||||||
|
float M = float(alphas[p]) / 255.0f * (1 - threshold) + threshold;
|
||||||
|
|
||||||
|
// Decode color.
|
||||||
|
float fr = r * M;
|
||||||
|
float fg = g * M;
|
||||||
|
float fb = b * M;
|
||||||
|
|
||||||
|
// Measure error.
|
||||||
|
float error = square(R - fr) + square(G - fg) + square(B - fb);
|
||||||
|
|
||||||
|
minDist = min(error, minDist);
|
||||||
|
}
|
||||||
|
|
||||||
|
totalError += minDist * src.weights[i];
|
||||||
|
|
||||||
|
if (totalError > bestError)
|
||||||
|
{
|
||||||
|
// early out
|
||||||
|
return totalError;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return totalError;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void computeAlphaIndices_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst)
|
||||||
|
{
|
||||||
|
uint8 alphas[8];
|
||||||
|
dst->evaluatePalette(alphas, /*d3d9=*/false); // @@ Use target decoder.
|
||||||
|
|
||||||
|
for (uint i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
float R = src.color(i).x;
|
||||||
|
float G = src.color(i).y;
|
||||||
|
float B = src.color(i).z;
|
||||||
|
|
||||||
|
float r = float(RGB.color(i).r) / 255.0f;
|
||||||
|
float g = float(RGB.color(i).g) / 255.0f;
|
||||||
|
float b = float(RGB.color(i).b) / 255.0f;
|
||||||
|
|
||||||
|
float minDist = FLT_MAX;
|
||||||
|
int bestIndex = 8;
|
||||||
|
for (uint p = 0; p < 8; p++)
|
||||||
|
{
|
||||||
|
// Compute M.
|
||||||
|
float M = float(alphas[p]) / 255.0f * (1 - threshold) + threshold;
|
||||||
|
|
||||||
|
// Decode color.
|
||||||
|
float fr = r * M;
|
||||||
|
float fg = g * M;
|
||||||
|
float fb = b * M;
|
||||||
|
|
||||||
|
// Measure error.
|
||||||
|
float error = square(R - fr) + square(G - fg) + square(B - fb);
|
||||||
|
|
||||||
|
if (error < minDist)
|
||||||
|
{
|
||||||
|
minDist = error;
|
||||||
|
bestIndex = p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
nvDebugCheck(bestIndex < 8);
|
||||||
|
|
||||||
|
dst->setIndex(i, bestIndex);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void OptimalCompress::compressDXT5A_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst)
|
||||||
|
{
|
||||||
|
uint8 mina = 255;
|
||||||
|
uint8 maxa = 0;
|
||||||
|
|
||||||
|
uint8 mina_no01 = 255;
|
||||||
|
uint8 maxa_no01 = 0;
|
||||||
|
|
||||||
|
// Get min/max alpha.
|
||||||
|
/*for (uint i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
uint8 alpha = src.alpha[i];
|
||||||
|
mina = min(mina, alpha);
|
||||||
|
maxa = max(maxa, alpha);
|
||||||
|
|
||||||
|
if (alpha != 0 && alpha != 255) {
|
||||||
|
mina_no01 = min(mina_no01, alpha);
|
||||||
|
maxa_no01 = max(maxa_no01, alpha);
|
||||||
|
}
|
||||||
|
}*/
|
||||||
|
mina = 0;
|
||||||
|
maxa = 255;
|
||||||
|
mina_no01 = 0;
|
||||||
|
maxa_no01 = 255;
|
||||||
|
|
||||||
|
/*if (maxa - mina < 8) {
|
||||||
|
dst->alpha0 = maxa;
|
||||||
|
dst->alpha1 = mina;
|
||||||
|
|
||||||
|
nvDebugCheck(computeAlphaError(src, dst) == 0);
|
||||||
|
}
|
||||||
|
else if (maxa_no01 - mina_no01 < 6) {
|
||||||
|
dst->alpha0 = mina_no01;
|
||||||
|
dst->alpha1 = maxa_no01;
|
||||||
|
|
||||||
|
nvDebugCheck(computeAlphaError(src, dst) == 0);
|
||||||
|
}
|
||||||
|
else*/
|
||||||
|
{
|
||||||
|
float besterror = computeAlphaError_RGBM(src, RGB, dst);
|
||||||
|
int besta0 = maxa;
|
||||||
|
int besta1 = mina;
|
||||||
|
|
||||||
|
// Expand search space a bit.
|
||||||
|
const int alphaExpand = 8;
|
||||||
|
mina = (mina <= alphaExpand) ? 0 : mina - alphaExpand;
|
||||||
|
maxa = (maxa >= 255 - alphaExpand) ? 255 : maxa + alphaExpand;
|
||||||
|
|
||||||
|
for (int a0 = mina + 9; a0 < maxa; a0++)
|
||||||
|
{
|
||||||
|
for (int a1 = mina; a1 < a0 - 8; a1++)
|
||||||
|
{
|
||||||
|
nvDebugCheck(a0 - a1 > 8);
|
||||||
|
|
||||||
|
dst->alpha0 = a0;
|
||||||
|
dst->alpha1 = a1;
|
||||||
|
float error = computeAlphaError_RGBM(src, RGB, dst, besterror);
|
||||||
|
|
||||||
|
if (error < besterror)
|
||||||
|
{
|
||||||
|
besterror = error;
|
||||||
|
besta0 = a0;
|
||||||
|
besta1 = a1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try using the 6 step encoding.
|
||||||
|
/*if (mina == 0 || maxa == 255)*/ {
|
||||||
|
|
||||||
|
// Expand search space a bit.
|
||||||
|
const int alphaExpand = 6;
|
||||||
|
mina_no01 = (mina_no01 <= alphaExpand) ? 0 : mina_no01 - alphaExpand;
|
||||||
|
maxa_no01 = (maxa_no01 >= 255 - alphaExpand) ? 255 : maxa_no01 + alphaExpand;
|
||||||
|
|
||||||
|
for (int a0 = mina_no01 + 9; a0 < maxa_no01; a0++)
|
||||||
|
{
|
||||||
|
for (int a1 = mina_no01; a1 < a0 - 8; a1++)
|
||||||
|
{
|
||||||
|
nvDebugCheck(a0 - a1 > 8);
|
||||||
|
|
||||||
|
dst->alpha0 = a1;
|
||||||
|
dst->alpha1 = a0;
|
||||||
|
float error = computeAlphaError_RGBM(src, RGB, dst, besterror);
|
||||||
|
|
||||||
|
if (error < besterror)
|
||||||
|
{
|
||||||
|
besterror = error;
|
||||||
|
besta0 = a1;
|
||||||
|
besta1 = a0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dst->alpha0 = besta0;
|
||||||
|
dst->alpha1 = besta1;
|
||||||
|
}
|
||||||
|
|
||||||
|
computeAlphaIndices_RGBM(src, RGB, dst);
|
||||||
|
}
|
||||||
|
@ -25,31 +25,38 @@
|
|||||||
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
|
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
|
||||||
#define NV_TT_OPTIMALCOMPRESSDXT_H
|
#define NV_TT_OPTIMALCOMPRESSDXT_H
|
||||||
|
|
||||||
#include <nvimage/nvimage.h>
|
//#include "nvimage/nvimage.h"
|
||||||
|
|
||||||
#include <nvmath/Color.h>
|
#include "nvmath/Color.h"
|
||||||
|
|
||||||
namespace nv
|
namespace nv
|
||||||
{
|
{
|
||||||
|
struct ColorSet;
|
||||||
struct ColorBlock;
|
struct ColorBlock;
|
||||||
struct BlockDXT1;
|
struct BlockDXT1;
|
||||||
struct BlockDXT3;
|
struct BlockDXT3;
|
||||||
struct BlockDXT5;
|
struct BlockDXT5;
|
||||||
struct AlphaBlockDXT3;
|
struct AlphaBlockDXT3;
|
||||||
struct AlphaBlockDXT5;
|
struct AlphaBlockDXT5;
|
||||||
|
struct AlphaBlock4x4;
|
||||||
|
|
||||||
namespace OptimalCompress
|
namespace OptimalCompress
|
||||||
{
|
{
|
||||||
|
// Single color compressors:
|
||||||
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
|
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
|
||||||
void compressDXT1a(Color32 rgba, uint alphaMask, BlockDXT1 * dxtBlock);
|
void compressDXT1a(Color32 rgba, uint alphaMask, BlockDXT1 * dxtBlock);
|
||||||
void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
|
void compressDXT1G(uint8 g, BlockDXT1 * dxtBlock);
|
||||||
|
|
||||||
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
|
void compressDXT3A(const AlphaBlock4x4 & src, AlphaBlockDXT3 * dst);
|
||||||
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
|
void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst);
|
||||||
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
|
|
||||||
|
|
||||||
void compressDXT1_Luma(const ColorBlock & rgba, BlockDXT1 * block);
|
void compressDXT1G(const ColorBlock & src, BlockDXT1 * dst);
|
||||||
|
void compressDXT3A(const ColorBlock & src, AlphaBlockDXT3 * dst);
|
||||||
|
void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst);
|
||||||
|
|
||||||
|
void compressDXT1_Luma(const ColorBlock & src, BlockDXT1 * dst);
|
||||||
|
|
||||||
|
void compressDXT5A_RGBM(const ColorSet & src, const ColorBlock & RGB, AlphaBlockDXT5 * dst);
|
||||||
}
|
}
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
|
||||||
|
@ -28,13 +28,13 @@
|
|||||||
#include "nvimage/ColorBlock.h"
|
#include "nvimage/ColorBlock.h"
|
||||||
#include "nvimage/BlockDXT.h"
|
#include "nvimage/BlockDXT.h"
|
||||||
|
|
||||||
#include "nvmath/Color.h"
|
#include "nvmath/Color.inl"
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
|
|
||||||
#include "nvcore/Utils.h" // swap
|
#include "nvcore/Utils.h" // swap
|
||||||
|
|
||||||
|
#include <string.h> // memset
|
||||||
|
|
||||||
using namespace nv;
|
using namespace nv;
|
||||||
using namespace QuickCompress;
|
using namespace QuickCompress;
|
||||||
@ -115,13 +115,28 @@ inline static void insetBBox(Vector3 * restrict maxColor, Vector3 * restrict min
|
|||||||
*minColor = clamp(*minColor + inset, 0.0f, 255.0f);
|
*minColor = clamp(*minColor + inset, 0.0f, 255.0f);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
|
|
||||||
// Takes a normalized color in [0, 255] range and returns
|
// Takes a normalized color in [0, 255] range and returns
|
||||||
inline static uint16 roundAndExpand(Vector3 * restrict v)
|
inline static uint16 roundAndExpand(Vector3 * restrict v)
|
||||||
{
|
{
|
||||||
uint r = uint(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f);
|
uint r = ftoi_floor(clamp(v->x * (31.0f / 255.0f), 0.0f, 31.0f));
|
||||||
uint g = uint(clamp(v->y * (63.0f / 255.0f), 0.0f, 63.0f) + 0.5f);
|
uint g = ftoi_floor(clamp(v->y * (63.0f / 255.0f), 0.0f, 63.0f));
|
||||||
uint b = uint(clamp(v->z * (31.0f / 255.0f), 0.0f, 31.0f) + 0.5f);
|
uint b = ftoi_floor(clamp(v->z * (31.0f / 255.0f), 0.0f, 31.0f));
|
||||||
|
|
||||||
|
float r0 = float(((r+0) << 3) | ((r+0) >> 2));
|
||||||
|
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
|
||||||
|
if (fabs(v->x - r1) < fabs(v->x - r0)) r = min(r+1, 31U);
|
||||||
|
|
||||||
|
float g0 = float(((g+0) << 2) | ((g+0) >> 4));
|
||||||
|
float g1 = float(((g+1) << 2) | ((g+1) >> 4));
|
||||||
|
if (fabs(v->y - g1) < fabs(v->y - g0)) g = min(g+1, 63U);
|
||||||
|
|
||||||
|
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
|
||||||
|
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
|
||||||
|
if (fabs(v->z - b1) < fabs(v->z - b0)) b = min(b+1, 31U);
|
||||||
|
|
||||||
|
|
||||||
uint16 w = (r << 11) | (g << 5) | b;
|
uint16 w = (r << 11) | (g << 5) | b;
|
||||||
|
|
||||||
r = (r << 3) | (r >> 2);
|
r = (r << 3) | (r >> 2);
|
||||||
@ -132,16 +147,57 @@ inline static uint16 roundAndExpand(Vector3 * restrict v)
|
|||||||
return w;
|
return w;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Takes a normalized color in [0, 255] range and returns
|
||||||
|
inline static uint16 roundAndExpand01(Vector3 * restrict v)
|
||||||
|
{
|
||||||
|
uint r = ftoi_floor(clamp(v->x * 31.0f, 0.0f, 31.0f));
|
||||||
|
uint g = ftoi_floor(clamp(v->y * 63.0f, 0.0f, 63.0f));
|
||||||
|
uint b = ftoi_floor(clamp(v->z * 31.0f, 0.0f, 31.0f));
|
||||||
|
|
||||||
|
float r0 = float(((r+0) << 3) | ((r+0) >> 2));
|
||||||
|
float r1 = float(((r+1) << 3) | ((r+1) >> 2));
|
||||||
|
if (fabs(v->x - r1) < fabs(v->x - r0)) r = min(r+1, 31U);
|
||||||
|
|
||||||
|
float g0 = float(((g+0) << 2) | ((g+0) >> 4));
|
||||||
|
float g1 = float(((g+1) << 2) | ((g+1) >> 4));
|
||||||
|
if (fabs(v->y - g1) < fabs(v->y - g0)) g = min(g+1, 63U);
|
||||||
|
|
||||||
|
float b0 = float(((b+0) << 3) | ((b+0) >> 2));
|
||||||
|
float b1 = float(((b+1) << 3) | ((b+1) >> 2));
|
||||||
|
if (fabs(v->z - b1) < fabs(v->z - b0)) b = min(b+1, 31U);
|
||||||
|
|
||||||
|
|
||||||
|
uint16 w = (r << 11) | (g << 5) | b;
|
||||||
|
|
||||||
|
r = (r << 3) | (r >> 2);
|
||||||
|
g = (g << 2) | (g >> 4);
|
||||||
|
b = (b << 3) | (b >> 2);
|
||||||
|
*v = Vector3(float(r) / 255.0f, float(g) / 255.0f, float(b) / 255.0f);
|
||||||
|
|
||||||
|
return w;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
|
inline static float colorDistance(Vector3::Arg c0, Vector3::Arg c1)
|
||||||
{
|
{
|
||||||
return dot(c0-c1, c0-c1);
|
return dot(c0-c1, c0-c1);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Vector3 round255(const Vector3 & v) {
|
||||||
|
//return Vector3(ftoi_round(255 * v.x), ftoi_round(255 * v.y), ftoi_round(255 * v.z)) * (1.0f / 255);
|
||||||
|
//return Vector3(floorf(v.x + 0.5f), floorf(v.y + 0.5f), floorf(v.z + 0.5f));
|
||||||
|
return v;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
|
inline static uint computeIndices4(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
|
||||||
{
|
{
|
||||||
Vector3 palette[4];
|
Vector3 palette[4];
|
||||||
palette[0] = maxColor;
|
palette[0] = maxColor;
|
||||||
palette[1] = minColor;
|
palette[1] = minColor;
|
||||||
|
//palette[2] = round255((2 * palette[0] + palette[1]) / 3.0f);
|
||||||
|
//palette[3] = round255((2 * palette[1] + palette[0]) / 3.0f);
|
||||||
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
|
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
|
||||||
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
|
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
|
||||||
|
|
||||||
@ -178,32 +234,58 @@ inline static uint computeIndices4(const ColorSet & set, Vector3::Arg maxColor,
|
|||||||
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
|
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
|
||||||
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
|
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
|
||||||
|
|
||||||
|
Vector3 mem[(4+2)*2];
|
||||||
|
memset(mem, 0, sizeof(mem));
|
||||||
|
|
||||||
|
Vector3 * row0 = mem;
|
||||||
|
Vector3 * row1 = mem + (4+2);
|
||||||
|
|
||||||
uint indices = 0;
|
uint indices = 0;
|
||||||
for(int i = 0; i < 16; i++)
|
//for(int i = 0; i < 16; i++)
|
||||||
{
|
for (uint y = 0; y < 4; y++) {
|
||||||
if (!set.isValidIndex(i)) {
|
for (uint x = 0; x < 4; x++) {
|
||||||
// Skip masked pixels and out of bounds.
|
int i = y*4+x;
|
||||||
continue;
|
|
||||||
|
if (!set.isValidIndex(i)) {
|
||||||
|
// Skip masked pixels and out of bounds.
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
Vector3 color = set.color(i).xyz();
|
||||||
|
|
||||||
|
// Add error.
|
||||||
|
color += row0[1+x];
|
||||||
|
|
||||||
|
float d0 = colorDistance(palette[0], color);
|
||||||
|
float d1 = colorDistance(palette[1], color);
|
||||||
|
float d2 = colorDistance(palette[2], color);
|
||||||
|
float d3 = colorDistance(palette[3], color);
|
||||||
|
|
||||||
|
uint b0 = d0 > d3;
|
||||||
|
uint b1 = d1 > d2;
|
||||||
|
uint b2 = d0 > d2;
|
||||||
|
uint b3 = d1 > d3;
|
||||||
|
uint b4 = d2 > d3;
|
||||||
|
|
||||||
|
uint x0 = b1 & b2;
|
||||||
|
uint x1 = b0 & b3;
|
||||||
|
uint x2 = b0 & b4;
|
||||||
|
|
||||||
|
int index = x2 | ((x0 | x1) << 1);
|
||||||
|
indices |= index << (2 * i);
|
||||||
|
|
||||||
|
// Compute new error.
|
||||||
|
Vector3 diff = color - palette[index];
|
||||||
|
|
||||||
|
// Propagate new error.
|
||||||
|
//row0[1+x+1] += 7.0f / 16.0f * diff;
|
||||||
|
//row1[1+x-1] += 3.0f / 16.0f * diff;
|
||||||
|
//row1[1+x+0] += 5.0f / 16.0f * diff;
|
||||||
|
//row1[1+x+1] += 1.0f / 16.0f * diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
Vector3 color = set.color(i).xyz();
|
swap(row0, row1);
|
||||||
|
memset(row1, 0, sizeof(row1));
|
||||||
float d0 = colorDistance(palette[0], color);
|
|
||||||
float d1 = colorDistance(palette[1], color);
|
|
||||||
float d2 = colorDistance(palette[2], color);
|
|
||||||
float d3 = colorDistance(palette[3], color);
|
|
||||||
|
|
||||||
uint b0 = d0 > d3;
|
|
||||||
uint b1 = d1 > d2;
|
|
||||||
uint b2 = d0 > d2;
|
|
||||||
uint b3 = d1 > d3;
|
|
||||||
uint b4 = d2 > d3;
|
|
||||||
|
|
||||||
uint x0 = b1 & b2;
|
|
||||||
uint x1 = b0 & b3;
|
|
||||||
uint x2 = b0 & b4;
|
|
||||||
|
|
||||||
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return indices;
|
return indices;
|
||||||
@ -214,6 +296,8 @@ inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg
|
|||||||
Vector3 palette[4];
|
Vector3 palette[4];
|
||||||
palette[0] = maxColor;
|
palette[0] = maxColor;
|
||||||
palette[1] = minColor;
|
palette[1] = minColor;
|
||||||
|
//palette[2] = round255((2 * palette[0] + palette[1]) / 3.0f);
|
||||||
|
//palette[3] = round255((2 * palette[1] + palette[0]) / 3.0f);
|
||||||
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
|
palette[2] = lerp(palette[0], palette[1], 1.0f / 3.0f);
|
||||||
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
|
palette[3] = lerp(palette[0], palette[1], 2.0f / 3.0f);
|
||||||
|
|
||||||
@ -231,6 +315,30 @@ inline static float evaluatePaletteError4(const Vector3 block[16], Vector3::Arg
|
|||||||
return total;
|
return total;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline static float evaluatePaletteError3(const Vector3 block[16], Vector3::Arg maxColor, Vector3::Arg minColor)
|
||||||
|
{
|
||||||
|
Vector3 palette[4];
|
||||||
|
palette[0] = minColor;
|
||||||
|
palette[1] = maxColor;
|
||||||
|
palette[2] = (palette[0] + palette[1]) * 0.5f;
|
||||||
|
palette[3] = Vector3(0);
|
||||||
|
|
||||||
|
float total = 0.0f;
|
||||||
|
for (int i = 0; i < 16; i++)
|
||||||
|
{
|
||||||
|
float d0 = colorDistance(palette[0], block[i]);
|
||||||
|
float d1 = colorDistance(palette[1], block[i]);
|
||||||
|
float d2 = colorDistance(palette[2], block[i]);
|
||||||
|
//float d3 = colorDistance(palette[3], block[i]);
|
||||||
|
|
||||||
|
//total += min(min(d0, d1), min(d2, d3));
|
||||||
|
total += min(min(d0, d1), d2);
|
||||||
|
}
|
||||||
|
|
||||||
|
return total;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// maxColor and minColor are expected to be in the same range as the color set.
|
// maxColor and minColor are expected to be in the same range as the color set.
|
||||||
inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
|
inline static uint computeIndices3(const ColorSet & set, Vector3::Arg maxColor, Vector3::Arg minColor)
|
||||||
{
|
{
|
||||||
@ -392,7 +500,7 @@ static void optimizeEndPoints3(Vector3 block[16], BlockDXT1 * dxtBlock)
|
|||||||
namespace
|
namespace
|
||||||
{
|
{
|
||||||
|
|
||||||
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
static uint computeAlphaIndices(const AlphaBlock4x4 & src, AlphaBlockDXT5 * block)
|
||||||
{
|
{
|
||||||
uint8 alphas[8];
|
uint8 alphas[8];
|
||||||
block->evaluatePalette(alphas, false); // @@ Use target decoder.
|
block->evaluatePalette(alphas, false); // @@ Use target decoder.
|
||||||
@ -401,7 +509,7 @@ namespace
|
|||||||
|
|
||||||
for (uint i = 0; i < 16; i++)
|
for (uint i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
uint8 alpha = rgba.color(i).a;
|
uint8 alpha = src.alpha[i];
|
||||||
|
|
||||||
uint besterror = 256*256;
|
uint besterror = 256*256;
|
||||||
uint best = 8;
|
uint best = 8;
|
||||||
@ -425,7 +533,7 @@ namespace
|
|||||||
return totalError;
|
return totalError;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
|
static void optimizeAlpha8(const AlphaBlock4x4 & src, AlphaBlockDXT5 * block)
|
||||||
{
|
{
|
||||||
float alpha2_sum = 0;
|
float alpha2_sum = 0;
|
||||||
float beta2_sum = 0;
|
float beta2_sum = 0;
|
||||||
@ -445,8 +553,8 @@ namespace
|
|||||||
alpha2_sum += alpha * alpha;
|
alpha2_sum += alpha * alpha;
|
||||||
beta2_sum += beta * beta;
|
beta2_sum += beta * beta;
|
||||||
alphabeta_sum += alpha * beta;
|
alphabeta_sum += alpha * beta;
|
||||||
alphax_sum += alpha * rgba.color(i).a;
|
alphax_sum += alpha * src.alpha[i];
|
||||||
betax_sum += beta * rgba.color(i).a;
|
betax_sum += beta * src.alpha[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
|
||||||
@ -653,14 +761,20 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
|
void QuickCompress::compressDXT3(const ColorBlock & src, BlockDXT3 * dxtBlock)
|
||||||
{
|
{
|
||||||
compressDXT1(rgba, &dxtBlock->color);
|
compressDXT1(src, &dxtBlock->color);
|
||||||
OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha);
|
OptimalCompress::compressDXT3A(src, &dxtBlock->alpha);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void QuickCompress::compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount/*=8*/)
|
||||||
|
{
|
||||||
|
AlphaBlock4x4 tmp;
|
||||||
|
tmp.init(src, 3);
|
||||||
|
compressDXT5A(tmp, dst, iterationCount);
|
||||||
|
}
|
||||||
|
|
||||||
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
|
void QuickCompress::compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount/*=8*/)
|
||||||
{
|
{
|
||||||
uint8 alpha0 = 0;
|
uint8 alpha0 = 0;
|
||||||
uint8 alpha1 = 255;
|
uint8 alpha1 = 255;
|
||||||
@ -668,7 +782,7 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
|
|||||||
// Get min/max alpha.
|
// Get min/max alpha.
|
||||||
for (uint i = 0; i < 16; i++)
|
for (uint i = 0; i < 16; i++)
|
||||||
{
|
{
|
||||||
uint8 alpha = rgba.color(i).a;
|
uint8 alpha = src.alpha[i];
|
||||||
alpha0 = max(alpha0, alpha);
|
alpha0 = max(alpha0, alpha);
|
||||||
alpha1 = min(alpha1, alpha);
|
alpha1 = min(alpha1, alpha);
|
||||||
}
|
}
|
||||||
@ -676,14 +790,14 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
|
|||||||
AlphaBlockDXT5 block;
|
AlphaBlockDXT5 block;
|
||||||
block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
|
block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
|
||||||
block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
|
block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
|
||||||
uint besterror = computeAlphaIndices(rgba, &block);
|
uint besterror = computeAlphaIndices(src, &block);
|
||||||
|
|
||||||
AlphaBlockDXT5 bestblock = block;
|
AlphaBlockDXT5 bestblock = block;
|
||||||
|
|
||||||
for (int i = 0; i < iterationCount; i++)
|
for (int i = 0; i < iterationCount; i++)
|
||||||
{
|
{
|
||||||
optimizeAlpha8(rgba, &block);
|
optimizeAlpha8(src, &block);
|
||||||
uint error = computeAlphaIndices(rgba, &block);
|
uint error = computeAlphaIndices(src, &block);
|
||||||
|
|
||||||
if (error >= besterror)
|
if (error >= besterror)
|
||||||
{
|
{
|
||||||
@ -701,7 +815,7 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
|
|||||||
};
|
};
|
||||||
|
|
||||||
// Copy best block to result;
|
// Copy best block to result;
|
||||||
*dxtBlock = bestblock;
|
*dst = bestblock;
|
||||||
}
|
}
|
||||||
|
|
||||||
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
|
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
|
||||||
@ -752,3 +866,108 @@ void QuickCompress::outputBlock3(const ColorSet & set, const Vector3 & start, co
|
|||||||
//optimizeEndPoints3(set, block);
|
//optimizeEndPoints3(set, block);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
inline Vector3 toVectorColor(int r, int g, int b) {
|
||||||
|
Vector3 c;
|
||||||
|
c.x = float((r << 3) | (r >> 2));
|
||||||
|
c.y = float((g << 2) | (g >> 4));
|
||||||
|
c.z = float((b << 3) | (b >> 2));
|
||||||
|
return c;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Do an exhaustive search inside the bounding box.
|
||||||
|
void compress_dxt1_bounding_box_exhaustive(const ColorBlock & input, BlockDXT1 * output)
|
||||||
|
{
|
||||||
|
int min_r = 255, min_g = 255, min_b = 255;
|
||||||
|
int max_r = 0, max_g = 0, max_b = 0;
|
||||||
|
|
||||||
|
for (int i = 0; i < 16; i++) {
|
||||||
|
Color32 c = input.color(i);
|
||||||
|
min_r = min(min_r, int(c.r));
|
||||||
|
max_r = max(max_r, int(c.r));
|
||||||
|
min_g = min(min_g, int(c.g));
|
||||||
|
max_g = max(max_g, int(c.g));
|
||||||
|
min_b = min(min_b, int(c.b));
|
||||||
|
max_b = max(max_b, int(c.b));
|
||||||
|
}
|
||||||
|
|
||||||
|
// Convert to 5:6:5
|
||||||
|
min_r >>= 3; min_g >>= 2; min_b >>= 3;
|
||||||
|
max_r >>= 3; max_g >>= 2; max_b >>= 3;
|
||||||
|
|
||||||
|
// Expand the box.
|
||||||
|
int range_r = max_r - min_r;
|
||||||
|
int range_g = max_g - min_g;
|
||||||
|
int range_b = max_b - min_b;
|
||||||
|
|
||||||
|
min_r = max(0, min_r - (range_r + 1) / 1 - 1);
|
||||||
|
min_g = max(0, min_g - (range_g + 1) / 1 - 1);
|
||||||
|
min_b = max(0, min_b - (range_b + 1) / 1 - 1);
|
||||||
|
|
||||||
|
max_r = min(31, max_r + (range_r + 1) / 2 + 1);
|
||||||
|
max_g = min(63, max_g + (range_g + 1) / 2 + 1);
|
||||||
|
max_b = min(31, max_b + (range_b + 1) / 2 + 1);
|
||||||
|
|
||||||
|
int count = (max_r-min_r) + (max_g-min_g) + (max_b-min_b);
|
||||||
|
|
||||||
|
Vector3 colors[16];
|
||||||
|
extractColorBlockRGB(input, colors);
|
||||||
|
|
||||||
|
|
||||||
|
// @@ Use a single loop and remap index to box location?
|
||||||
|
float bestError = FLT_MAX;
|
||||||
|
Vector3 best0, best1;
|
||||||
|
bool threeColorMode;
|
||||||
|
|
||||||
|
for(int r0 = min_r; r0 <= max_r; r0++)
|
||||||
|
for(int r1 = max_r; r1 >= r0; r1--)
|
||||||
|
for(int g0 = min_g; g0 <= max_g; g0++)
|
||||||
|
for(int g1 = max_g; g1 >= g0; g1--)
|
||||||
|
for(int b0 = min_b; b0 <= max_b; b0++)
|
||||||
|
for(int b1 = max_b; b1 >= b0; b1--)
|
||||||
|
{
|
||||||
|
Vector3 c0 = toVectorColor(r0, g0, b0);
|
||||||
|
Vector3 c1 = toVectorColor(r1, g1, b1);
|
||||||
|
|
||||||
|
// Compute palette and evaluate error for these endpoints.
|
||||||
|
float error = evaluatePaletteError4(colors, c1, c0);
|
||||||
|
|
||||||
|
if (error < bestError) {
|
||||||
|
bestError = error;
|
||||||
|
best0 = c1; // c0 > c1
|
||||||
|
best1 = c0;
|
||||||
|
threeColorMode = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
error = evaluatePaletteError3(colors, /*maxColor=*/c1, /*minColor=*/c0);
|
||||||
|
|
||||||
|
if (error < bestError) {
|
||||||
|
bestError = error;
|
||||||
|
best0 = c0;
|
||||||
|
best1 = c1;
|
||||||
|
threeColorMode = true;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
uint16 color0 = roundAndExpand(&best0);
|
||||||
|
uint16 color1 = roundAndExpand(&best1);
|
||||||
|
|
||||||
|
if (threeColorMode) {
|
||||||
|
nvCheck(color0 <= color1);
|
||||||
|
output->col0 = Color16(color1);
|
||||||
|
output->col1 = Color16(color0);
|
||||||
|
output->indices = computeIndices3(colors, best0, best1);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
nvCheck(color0 >= color1);
|
||||||
|
output->col0 = Color16(color0);
|
||||||
|
output->col1 = Color16(color1);
|
||||||
|
output->indices = computeIndices4(colors, best0, best1);
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
@ -31,6 +31,7 @@ namespace nv
|
|||||||
{
|
{
|
||||||
struct ColorBlock;
|
struct ColorBlock;
|
||||||
struct ColorSet;
|
struct ColorSet;
|
||||||
|
struct AlphaBlock4x4;
|
||||||
struct BlockDXT1;
|
struct BlockDXT1;
|
||||||
struct BlockDXT3;
|
struct BlockDXT3;
|
||||||
struct BlockDXT5;
|
struct BlockDXT5;
|
||||||
@ -40,13 +41,15 @@ namespace nv
|
|||||||
|
|
||||||
namespace QuickCompress
|
namespace QuickCompress
|
||||||
{
|
{
|
||||||
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
|
void compressDXT1(const ColorBlock & src, BlockDXT1 * dst);
|
||||||
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
|
void compressDXT1a(const ColorBlock & src, BlockDXT1 * dst);
|
||||||
|
|
||||||
void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
|
void compressDXT3(const ColorBlock & src, BlockDXT3 * dst);
|
||||||
|
|
||||||
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
|
void compressDXT5A(const ColorBlock & src, AlphaBlockDXT5 * dst, int iterationCount=8);
|
||||||
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
|
void compressDXT5A(const AlphaBlock4x4 & src, AlphaBlockDXT5 * dst, int iterationCount=8);
|
||||||
|
|
||||||
|
void compressDXT5(const ColorBlock & src, BlockDXT5 * dst, int iterationCount=8);
|
||||||
|
|
||||||
void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
|
void outputBlock4(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
|
||||||
void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
|
void outputBlock3(const ColorSet & set, const Vector3 & start, const Vector3 & end, BlockDXT1 * block);
|
||||||
|
@ -28,6 +28,7 @@
|
|||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Color.h"
|
#include "nvmath/Color.h"
|
||||||
#include "nvmath/Half.h"
|
#include "nvmath/Half.h"
|
||||||
|
#include "nvmath/ftoi.h"
|
||||||
|
|
||||||
#include "nvimage/Filter.h"
|
#include "nvimage/Filter.h"
|
||||||
#include "nvimage/ImageIO.h"
|
#include "nvimage/ImageIO.h"
|
||||||
@ -78,13 +79,13 @@ namespace
|
|||||||
else if (format == Format_DXT3) {
|
else if (format == Format_DXT3) {
|
||||||
return 16;
|
return 16;
|
||||||
}
|
}
|
||||||
else if (format == Format_DXT5 || format == Format_DXT5n) {
|
else if (format == Format_DXT5 || format == Format_DXT5n || format == Format_BC3_RGBM) {
|
||||||
return 16;
|
return 16;
|
||||||
}
|
}
|
||||||
else if (format == Format_BC4) {
|
else if (format == Format_BC4) {
|
||||||
return 8;
|
return 8;
|
||||||
}
|
}
|
||||||
else if (format == Format_BC5) {
|
else if (format == Format_BC5 || format == Format_BC5_Luma) {
|
||||||
return 16;
|
return 16;
|
||||||
}
|
}
|
||||||
else if (format == Format_CTX1) {
|
else if (format == Format_CTX1) {
|
||||||
@ -347,13 +348,13 @@ int Surface::countMipmaps(int min_size) const
|
|||||||
return ::countMipmapsWithMinSize(m->image->width(), m->image->height(), 1, min_size);
|
return ::countMipmapsWithMinSize(m->image->width(), m->image->height(), 1, min_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
float Surface::alphaTestCoverage(float alphaRef/*= 0.5*/) const
|
float Surface::alphaTestCoverage(float alphaRef/*= 0.5*/, int alpha_channel/*=3*/) const
|
||||||
{
|
{
|
||||||
if (m->image == NULL) return 0.0f;
|
if (m->image == NULL) return 0.0f;
|
||||||
|
|
||||||
alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256);
|
alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256);
|
||||||
|
|
||||||
return m->image->alphaTestCoverage(alphaRef, 3);
|
return m->image->alphaTestCoverage(alphaRef, alpha_channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
float Surface::average(int channel, int alpha_channel/*= -1*/, float gamma /*= 2.2f*/) const
|
float Surface::average(int channel, int alpha_channel/*= -1*/, float gamma /*= 2.2f*/) const
|
||||||
@ -419,7 +420,7 @@ void Surface::histogram(int channel, float rangeMin, float rangeMax, int binCoun
|
|||||||
const uint count = m->image->pixelCount();
|
const uint count = m->image->pixelCount();
|
||||||
for (uint i = 0; i < count; i++) {
|
for (uint i = 0; i < count; i++) {
|
||||||
float f = c[i] * scale + bias;
|
float f = c[i] * scale + bias;
|
||||||
int idx = ifloor(f);
|
int idx = ftoi_floor(f);
|
||||||
if (idx < 0) idx = 0;
|
if (idx < 0) idx = 0;
|
||||||
if (idx > binCount-1) idx = binCount-1;
|
if (idx > binCount-1) idx = binCount-1;
|
||||||
binPtr[idx]++;
|
binPtr[idx]++;
|
||||||
@ -434,18 +435,17 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_c
|
|||||||
|
|
||||||
if (alpha_channel == -1) { // no alpha channel; just like the original range function
|
if (alpha_channel == -1) { // no alpha channel; just like the original range function
|
||||||
|
|
||||||
if (m->image != NULL)
|
if (m->image != NULL) {
|
||||||
{
|
float * c = img->channel(channel);
|
||||||
float * c = img->channel(channel);
|
|
||||||
|
|
||||||
const uint count = img->pixelCount();
|
const uint count = img->pixelCount();
|
||||||
for (uint p = 0; p < count; p++) {
|
for (uint p = 0; p < count; p++) {
|
||||||
float f = c[p];
|
float f = c[p];
|
||||||
if (f < range.x) range.x = f;
|
if (f < range.x) range.x = f;
|
||||||
if (f > range.y) range.y = f;
|
if (f > range.y) range.y = f;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
else { // use alpha test to ignore some pixels
|
else { // use alpha test to ignore some pixels
|
||||||
//note, it's quite possible to get FLT_MAX,-FLT_MAX back if all pixels fail the test
|
//note, it's quite possible to get FLT_MAX,-FLT_MAX back if all pixels fail the test
|
||||||
|
|
||||||
@ -623,6 +623,23 @@ bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (format == InputFormat_R_32F)
|
||||||
|
{
|
||||||
|
const float * src = (const float *)data;
|
||||||
|
|
||||||
|
TRY {
|
||||||
|
for (int i = 0; i < count; i++)
|
||||||
|
{
|
||||||
|
rdst[i] = src[i];
|
||||||
|
gdst[i] = 0;
|
||||||
|
bdst[i] = 0;
|
||||||
|
adst[i] = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
CATCH {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -695,6 +712,20 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (format == InputFormat_R_32F)
|
||||||
|
{
|
||||||
|
const float * rsrc = (const float *)r;
|
||||||
|
|
||||||
|
TRY {
|
||||||
|
memcpy(rdst, rsrc, count * sizeof(float));
|
||||||
|
memset(gdst, 0, count * sizeof(float));
|
||||||
|
memset(bdst, 0, count * sizeof(float));
|
||||||
|
memset(adst, 0, count * sizeof(float));
|
||||||
|
}
|
||||||
|
CATCH {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@ -703,12 +734,12 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r,
|
|||||||
bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data)
|
bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const void * data)
|
||||||
{
|
{
|
||||||
if (format != nvtt::Format_BC1 &&
|
if (format != nvtt::Format_BC1 &&
|
||||||
format != nvtt::Format_BC2 &&
|
format != nvtt::Format_BC2 &&
|
||||||
format != nvtt::Format_BC3 &&
|
format != nvtt::Format_BC3 &&
|
||||||
format != nvtt::Format_BC4 &&
|
format != nvtt::Format_BC4 &&
|
||||||
format != nvtt::Format_BC5 &&
|
format != nvtt::Format_BC5 &&
|
||||||
format != nvtt::Format_BC6 &&
|
format != nvtt::Format_BC6 &&
|
||||||
format != nvtt::Format_BC7)
|
format != nvtt::Format_BC7)
|
||||||
{
|
{
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
@ -1466,7 +1497,7 @@ void Surface::fill(float red, float green, float blue, float alpha)
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)
|
void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/, int alpha_channel/*= 3*/)
|
||||||
{
|
{
|
||||||
if (isNull()) return;
|
if (isNull()) return;
|
||||||
|
|
||||||
@ -1474,7 +1505,7 @@ void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)
|
|||||||
|
|
||||||
alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256);
|
alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256);
|
||||||
|
|
||||||
m->image->scaleAlphaToCoverage(coverage, alphaRef, 3);
|
m->image->scaleAlphaToCoverage(coverage, alphaRef, alpha_channel);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*bool Surface::normalizeRange(float * rangeMin, float * rangeMax)
|
/*bool Surface::normalizeRange(float * rangeMin, float * rangeMax)
|
||||||
@ -1507,7 +1538,7 @@ void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/)
|
|||||||
|
|
||||||
// Ideally you should compress/quantize the RGB and M portions independently.
|
// Ideally you should compress/quantize the RGB and M portions independently.
|
||||||
// Once you have M quantized, you would compute the corresponding RGB and quantize that.
|
// Once you have M quantized, you would compute the corresponding RGB and quantize that.
|
||||||
void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.0f*/)
|
void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
|
||||||
{
|
{
|
||||||
if (isNull()) return;
|
if (isNull()) return;
|
||||||
|
|
||||||
@ -1523,60 +1554,71 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.0f*/)
|
|||||||
|
|
||||||
const uint count = img->pixelCount();
|
const uint count = img->pixelCount();
|
||||||
for (uint i = 0; i < count; i++) {
|
for (uint i = 0; i < count; i++) {
|
||||||
float R = r[i];
|
float R = nv::clamp(r[i], 0.0f, 1.0f);
|
||||||
float G = g[i];
|
float G = nv::clamp(g[i], 0.0f, 1.0f);
|
||||||
float B = b[i];
|
float B = nv::clamp(b[i], 0.0f, 1.0f);
|
||||||
#if 1
|
|
||||||
float M = nv::clamp(max(max(R, G), max(B, threshold)), 0.0f, range);
|
|
||||||
|
|
||||||
r[i] = nv::clamp(R / M, 0.0f, 1.0f);
|
#if 0
|
||||||
g[i] = nv::clamp(G / M, 0.0f, 1.0f);
|
// Baseline, no compression:
|
||||||
b[i] = nv::clamp(B / M, 0.0f, 1.0f);
|
r[i] = R;
|
||||||
|
g[i] = G;
|
||||||
|
b[i] = B;
|
||||||
|
a[i] = 1;
|
||||||
|
|
||||||
a[i] = (M - threshold) / (range - threshold);
|
#elif 0
|
||||||
|
float M = max(max(R, G), max(B, threshold));
|
||||||
|
|
||||||
|
r[i] = R / M;
|
||||||
|
g[i] = G / M;
|
||||||
|
b[i] = B / M;
|
||||||
|
|
||||||
|
a[i] = (M - threshold) / (1 - threshold);
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
// The optimal compressor produces the best results, but can introduce interpolation errors!
|
||||||
// The optimal compressor theoretically produces the best results, but unfortunately introduces
|
|
||||||
// severe interpolation errors!
|
|
||||||
float bestM;
|
float bestM;
|
||||||
float bestError = FLT_MAX;
|
float bestError = FLT_MAX;
|
||||||
|
|
||||||
int minM = iround(min(R, G, B) * 255.0f);
|
float M = max(max(R, G), max(B, threshold));
|
||||||
|
int iM = ftoi_ceil((M - threshold) / (1 - threshold) * 255.0f);
|
||||||
|
|
||||||
for (int m = minM; m < 256; m++) {
|
//for (int m = 0; m < 256; m++) { // If we use the entire search space, interpolation errors are very likely to occur.
|
||||||
|
for (int m = max(iM-16, 0); m < min(iM+16, 256); m++) { // If we constrain the search space, these errors disappear.
|
||||||
float fm = float(m) / 255.0f;
|
float fm = float(m) / 255.0f;
|
||||||
|
|
||||||
|
// Decode M
|
||||||
|
float M = fm * (1 - threshold) + threshold;
|
||||||
|
|
||||||
// Encode.
|
// Encode.
|
||||||
int ir = iround(255.0f * nv::clamp(R / fm, 0.0f, 1.0f));
|
int ir = ftoi_round(255.0f * nv::saturate(R / M));
|
||||||
int ig = iround(255.0f * nv::clamp(G / fm, 0.0f, 1.0f));
|
int ig = ftoi_round(255.0f * nv::saturate(G / M));
|
||||||
int ib = iround(255.0f * nv::clamp(B / fm, 0.0f, 1.0f));
|
int ib = ftoi_round(255.0f * nv::saturate(B / M));
|
||||||
|
|
||||||
// Decode.
|
// Decode.
|
||||||
float fr = (float(ir) / 255.0f) * fm;
|
float fr = (float(ir) / 255.0f) * M;
|
||||||
float fg = (float(ig) / 255.0f) * fm;
|
float fg = (float(ig) / 255.0f) * M;
|
||||||
float fb = (float(ib) / 255.0f) * fm;
|
float fb = (float(ib) / 255.0f) * M;
|
||||||
|
|
||||||
// Measure error.
|
// Measure error.
|
||||||
float error = square(R-fr) + square(G-fg) + square(B-fb);
|
float error = square(R-fr) + square(G-fg) + square(B-fb);
|
||||||
|
|
||||||
if (error < bestError) {
|
if (error < bestError) {
|
||||||
bestError = error;
|
bestError = error;
|
||||||
bestM = fm;
|
bestM = M;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
M = bestM;
|
M = bestM;
|
||||||
r[i] = nv::clamp(R / M, 0.0f, 1.0f);
|
r[i] = nv::saturate(R / M);
|
||||||
g[i] = nv::clamp(G / M, 0.0f, 1.0f);
|
g[i] = nv::saturate(G / M);
|
||||||
b[i] = nv::clamp(B / M, 0.0f, 1.0f);
|
b[i] = nv::saturate(B / M);
|
||||||
a[i] = M;
|
a[i] = (M - threshold) / (1 - threshold);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// @@ IC: Dubious merge. Review!
|
||||||
void Surface::fromRGBM(float range/*= 1*/, float threshold/*= 0.0*/)
|
void Surface::fromRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
|
||||||
{
|
{
|
||||||
if (isNull()) return;
|
if (isNull()) return;
|
||||||
|
|
||||||
@ -1798,7 +1840,7 @@ void Surface::toRGBE(int mantissaBits, int exponentBits)
|
|||||||
double denom = pow(2.0, double(E - exponentBias - mantissaBits));
|
double denom = pow(2.0, double(E - exponentBias - mantissaBits));
|
||||||
|
|
||||||
// Refine exponent:
|
// Refine exponent:
|
||||||
int m = iround(float(M / denom));
|
int m = ftoi_round(float(M / denom));
|
||||||
nvDebugCheck(m <= (1 << mantissaBits));
|
nvDebugCheck(m <= (1 << mantissaBits));
|
||||||
|
|
||||||
if (m == (1 << mantissaBits)) {
|
if (m == (1 << mantissaBits)) {
|
||||||
@ -1866,10 +1908,10 @@ void Surface::fromRGBE(int mantissaBits, int exponentBits)
|
|||||||
const uint count = img->pixelCount();
|
const uint count = img->pixelCount();
|
||||||
for (uint i = 0; i < count; i++) {
|
for (uint i = 0; i < count; i++) {
|
||||||
// Expand normalized float to to 9995
|
// Expand normalized float to to 9995
|
||||||
int R = iround(r[i] * ((1 << mantissaBits) - 1));
|
int R = ftoi_round(r[i] * ((1 << mantissaBits) - 1));
|
||||||
int G = iround(g[i] * ((1 << mantissaBits) - 1));
|
int G = ftoi_round(g[i] * ((1 << mantissaBits) - 1));
|
||||||
int B = iround(b[i] * ((1 << mantissaBits) - 1));
|
int B = ftoi_round(b[i] * ((1 << mantissaBits) - 1));
|
||||||
int E = iround(a[i] * ((1 << exponentBits) - 1));
|
int E = ftoi_round(a[i] * ((1 << exponentBits) - 1));
|
||||||
|
|
||||||
//float scale = ldexpf(1.0f, E - exponentBias - mantissaBits);
|
//float scale = ldexpf(1.0f, E - exponentBias - mantissaBits);
|
||||||
float scale = powf(2, float(E - exponentBias - mantissaBits));
|
float scale = powf(2, float(E - exponentBias - mantissaBits));
|
||||||
@ -2741,8 +2783,8 @@ bool Surface::copy(const Surface & srcImage, int xsrc, int ysrc, int zsrc, int x
|
|||||||
FloatImage * dst = m->image;
|
FloatImage * dst = m->image;
|
||||||
const FloatImage * src = srcImage.m->image;
|
const FloatImage * src = srcImage.m->image;
|
||||||
|
|
||||||
if (toU32(xsrc + xsize) > src->width() || toU32(ysrc + ysize) > src->height() || toU32(zsrc + zsize) > src->depth()) return false;
|
if (U32(xsrc + xsize) > src->width() || U32(ysrc + ysize) > src->height() || U32(zsrc + zsize) > src->depth()) return false;
|
||||||
if (toU32(xdst + xsize) > dst->width() || toU32(ydst + ysize) > dst->height() || toU32(zdst + zsize) > dst->depth()) return false;
|
if (U32(xdst + xsize) > dst->width() || U32(ydst + ysize) > dst->height() || U32(zdst + zsize) > dst->depth()) return false;
|
||||||
|
|
||||||
detach();
|
detach();
|
||||||
|
|
||||||
@ -2765,6 +2807,65 @@ bool Surface::copy(const Surface & srcImage, int xsrc, int ysrc, int zsrc, int x
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Draw colored border around atlas elements.
|
||||||
|
void Surface::setAtlasBorder(int aw, int ah, float r, float g, float b, float a)
|
||||||
|
{
|
||||||
|
if (isNull()) return;
|
||||||
|
if (aw <= 0) return;
|
||||||
|
if (ah <= 0) return;
|
||||||
|
|
||||||
|
detach();
|
||||||
|
|
||||||
|
FloatImage * img = m->image;
|
||||||
|
const uint w = img->width();
|
||||||
|
const uint h = img->height();
|
||||||
|
const uint d = img->depth();
|
||||||
|
|
||||||
|
// @@ Ideally the reminder of these divisions should be 0.
|
||||||
|
uint tile_height = h / ah;
|
||||||
|
uint tile_width = w / aw;
|
||||||
|
|
||||||
|
// Note that this renders two consecutive lines between tiles. In theory we could just have one, but this way I think we have better rotation invariance.
|
||||||
|
|
||||||
|
for (uint z = 0; z < d; z++)
|
||||||
|
{
|
||||||
|
// Horizontal lines:
|
||||||
|
for (uint i = 0, y = 0; i < uint(ah); i++, y += tile_height)
|
||||||
|
{
|
||||||
|
for (uint x = 0; x < w; x++)
|
||||||
|
{
|
||||||
|
img->pixel(0, x, y, z) = r;
|
||||||
|
img->pixel(1, x, y, z) = g;
|
||||||
|
img->pixel(2, x, y, z) = b;
|
||||||
|
img->pixel(3, x, y, z) = a;
|
||||||
|
|
||||||
|
img->pixel(0, x, y + tile_height - 1, z) = r;
|
||||||
|
img->pixel(1, x, y + tile_height - 1, z) = g;
|
||||||
|
img->pixel(2, x, y + tile_height - 1, z) = b;
|
||||||
|
img->pixel(3, x, y + tile_height - 1, z) = a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Vertical lines:
|
||||||
|
for (uint i = 0, x = 0; i < uint(ah); i++, x += tile_width)
|
||||||
|
{
|
||||||
|
for (uint y = 0; y < h; y++)
|
||||||
|
{
|
||||||
|
img->pixel(0, x, y, z) = r;
|
||||||
|
img->pixel(1, x, y, z) = g;
|
||||||
|
img->pixel(2, x, y, z) = b;
|
||||||
|
img->pixel(3, x, y, z) = a;
|
||||||
|
|
||||||
|
img->pixel(0, x + tile_width - 1, y, z) = r;
|
||||||
|
img->pixel(1, x + tile_width - 1, y, z) = g;
|
||||||
|
img->pixel(2, x + tile_width - 1, y, z) = b;
|
||||||
|
img->pixel(3, x + tile_width - 1, y, z) = a;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
float nvtt::rmsError(const Surface & reference, const Surface & image)
|
float nvtt::rmsError(const Surface & reference, const Surface & image)
|
||||||
{
|
{
|
||||||
@ -2839,5 +2940,24 @@ Surface nvtt::diff(const Surface & reference, const Surface & image, float scale
|
|||||||
return diffImage;
|
return diffImage;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
float nvtt::rmsToneMappedError(const Surface & reference, const Surface & img, float exposure)
|
||||||
|
{
|
||||||
|
// @@ We could do this in the rms function without having to create image copies.
|
||||||
|
Surface r = reference;
|
||||||
|
Surface i = img;
|
||||||
|
|
||||||
|
// @@ Ideally we should use our Reindhart operator. Add Reindhart_L & Reindhart_M ?
|
||||||
|
|
||||||
|
float scale = 1.0f / exposure;
|
||||||
|
|
||||||
|
r.scaleBias(0, scale, 0); r.scaleBias(1, scale, 0); r.scaleBias(2, scale, 0);
|
||||||
|
r.toneMap(ToneMapper_Reindhart, NULL);
|
||||||
|
r.toSrgb();
|
||||||
|
|
||||||
|
i.scaleBias(0, scale, 0); i.scaleBias(1, scale, 0); i.scaleBias(2, scale, 0);
|
||||||
|
i.toneMap(ToneMapper_Reindhart, NULL);
|
||||||
|
i.toSrgb();
|
||||||
|
|
||||||
|
return nv::rmsColorError(r.m->image, i.m->image, reference.alphaMode() == nvtt::AlphaMode_Transparency);
|
||||||
|
}
|
||||||
|
|
||||||
|
@ -41,7 +41,7 @@ public:
|
|||||||
return out;
|
return out;
|
||||||
}
|
}
|
||||||
int getptr() { return bptr; }
|
int getptr() { return bptr; }
|
||||||
int setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
|
void setptr(int ptr) { nvAssert (ptr >= 0 && ptr < maxbits); bptr = ptr; }
|
||||||
int getsize() { return bend; }
|
int getsize() { return bend; }
|
||||||
|
|
||||||
private:
|
private:
|
||||||
@ -60,8 +60,7 @@ private:
|
|||||||
return bit != 0;
|
return bit != 0;
|
||||||
}
|
}
|
||||||
void writeone(int bit) {
|
void writeone(int bit) {
|
||||||
if (readonly)
|
nvAssert (!readonly); // "Writing a read-only bit stream"
|
||||||
throw "Writing a read-only bit stream";
|
|
||||||
nvAssert (bptr < maxbits);
|
nvAssert (bptr < maxbits);
|
||||||
if (bptr >= maxbits) return;
|
if (bptr >= maxbits) return;
|
||||||
if (bit&1)
|
if (bit&1)
|
||||||
|
@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#ifndef _ZOH_TILE_H
|
#ifndef _ZOH_TILE_H
|
||||||
#define _ZOH_TILE_H
|
#define _ZOH_TILE_H
|
||||||
|
|
||||||
#include "utils.h"
|
#include "zoh_utils.h"
|
||||||
#include "nvmath/Vector.h"
|
#include "nvmath/Vector.h"
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
|
|
||||||
// Utility and common routines
|
// Utility and common routines
|
||||||
|
|
||||||
#include "utils.h"
|
#include "zoh_utils.h"
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
|
|
@ -16,7 +16,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "bits.h"
|
#include "bits.h"
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
#include "zoh.h"
|
#include "zoh.h"
|
||||||
#include "utils.h"
|
#include "zoh_utils.h"
|
||||||
|
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
@ -591,13 +591,14 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
{
|
pixels[np] = tile.data[y][x];
|
||||||
pixels[np] = tile.data[y][x];
|
importance[np] = tile.importance_map[y][x];
|
||||||
importance[np] = tile.importance_map[y][x];
|
++np;
|
||||||
++np;
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
|
optimize_one(pixels, importance, np, orig_err[region], orig_endpts[region], prec, opt_endpts[region]);
|
||||||
@ -660,7 +661,9 @@ float ZOH::refineone(const Tile &tile, int shapeindex_best, const FltEndpts endp
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (refineone.)";
|
|
||||||
|
nvAssert (false); // "No candidate found, should never happen (refineone.)";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
|
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_ONE], Vector3 palette[NREGIONS_ONE][NINDICES])
|
||||||
|
@ -40,7 +40,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "bits.h"
|
#include "bits.h"
|
||||||
#include "tile.h"
|
#include "tile.h"
|
||||||
#include "zoh.h"
|
#include "zoh.h"
|
||||||
#include "utils.h"
|
#include "zoh_utils.h"
|
||||||
|
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
@ -747,7 +747,8 @@ float ZOH::refinetwo(const Tile &tile, int shapeindex_best, const FltEndpts endp
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (refinetwo.)";
|
nvAssert(false); //throw "No candidate found, should never happen (refinetwo.)";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])
|
static void generate_palette_unquantized(const FltEndpts endpts[NREGIONS_TWO], Vector3 palette[NREGIONS_TWO][NINDICES])
|
||||||
|
@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "endpts.h"
|
#include "endpts.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -394,7 +394,7 @@ void AVPCL::decompress_mode0(const char *block, Tile &t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||||
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
Vector4 palette[NINDICES];
|
Vector4 palette[NINDICES];
|
||||||
float toterr = 0;
|
float toterr = 0;
|
||||||
@ -404,11 +404,11 @@ static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &en
|
|||||||
|
|
||||||
for (int i = 0; i < np; ++i)
|
for (int i = 0; i < np; ++i)
|
||||||
{
|
{
|
||||||
float err, besterr = FLT_MAX;
|
float besterr = FLT_MAX;
|
||||||
|
|
||||||
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
||||||
{
|
{
|
||||||
err = Utils::metric4(colors[i], palette[j]);
|
float err = Utils::metric4(colors[i], palette[j]) * importance[i];
|
||||||
|
|
||||||
if (err > besterr) // error increased, so we're done searching
|
if (err > besterr) // error increased, so we're done searching
|
||||||
break;
|
break;
|
||||||
@ -472,7 +472,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
|
|||||||
|
|
||||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||||
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
|
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
|
||||||
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
// we have the old endpoints: old_endpts
|
// we have the old endpoints: old_endpts
|
||||||
@ -511,7 +511,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
|
||||||
|
|
||||||
if (err < min_err)
|
if (err < min_err)
|
||||||
{
|
{
|
||||||
@ -543,7 +543,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
// for np = 16 -- adjust error thresholds as a function of np
|
// for np = 16 -- adjust error thresholds as a function of np
|
||||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||||
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
||||||
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
IntEndptsRGB_2 temp_endpts;
|
IntEndptsRGB_2 temp_endpts;
|
||||||
float best_err = orig_err;
|
float best_err = orig_err;
|
||||||
@ -593,7 +593,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -613,7 +613,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -636,7 +636,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
return best_err;
|
return best_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_2 &opt_endpts)
|
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_2 &opt_endpts)
|
||||||
{
|
{
|
||||||
float opt_err = orig_err;
|
float opt_err = orig_err;
|
||||||
|
|
||||||
@ -675,8 +675,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
{
|
{
|
||||||
// figure out which endpoint when perturbed gives the most improvement and start there
|
// figure out which endpoint when perturbed gives the most improvement and start there
|
||||||
// if we just alternate, we can easily end up in a local minima
|
// if we just alternate, we can easily end up in a local minima
|
||||||
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
||||||
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
||||||
|
|
||||||
if (err0 < err1)
|
if (err0 < err1)
|
||||||
{
|
{
|
||||||
@ -712,7 +712,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
// now alternate endpoints and keep trying until there is no improvement
|
// now alternate endpoints and keep trying until there is no improvement
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
||||||
if (err >= opt_err)
|
if (err >= opt_err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -746,7 +746,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
bool first = true;
|
bool first = true;
|
||||||
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
||||||
{
|
{
|
||||||
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||||
|
|
||||||
if (new_err < opt_err)
|
if (new_err < opt_err)
|
||||||
{
|
{
|
||||||
@ -786,6 +786,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
|
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
|
||||||
{
|
{
|
||||||
Vector4 pixels[Tile::TILE_TOTAL];
|
Vector4 pixels[Tile::TILE_TOTAL];
|
||||||
|
float importance[Tile::TILE_TOTAL];
|
||||||
IntEndptsRGB_2 temp_in, temp_out;
|
IntEndptsRGB_2 temp_in, temp_out;
|
||||||
int temp_indices[Tile::TILE_TOTAL];
|
int temp_indices[Tile::TILE_TOTAL];
|
||||||
|
|
||||||
@ -794,10 +795,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
pixels[np++] = tile.data[y][x];
|
pixels[np] = tile.data[y][x];
|
||||||
|
importance[np] = tile.importance_map[y][x];
|
||||||
|
np++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||||
opt_err[region] = orig_err[region];
|
opt_err[region] = orig_err[region];
|
||||||
@ -812,10 +818,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// make sure we have a valid error for temp_in
|
// make sure we have a valid error for temp_in
|
||||||
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
||||||
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
||||||
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
||||||
|
|
||||||
// now try to optimize these endpoints
|
// now try to optimize these endpoints
|
||||||
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||||
|
|
||||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||||
if (temp_out_err < best_err)
|
if (temp_out_err < best_err)
|
||||||
@ -890,7 +896,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (mode avpcl 0).";
|
nvAssert(false); // throw "No candidate found, should never happen (mode avpcl 0).";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clamp(Vector4 &v)
|
static void clamp(Vector4 &v)
|
||||||
|
@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "endpts.h"
|
#include "endpts.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -378,7 +378,7 @@ void AVPCL::decompress_mode1(const char *block, Tile &t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||||
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_1 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
Vector4 palette[NINDICES];
|
Vector4 palette[NINDICES];
|
||||||
float toterr = 0;
|
float toterr = 0;
|
||||||
@ -388,11 +388,11 @@ static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_1 &en
|
|||||||
|
|
||||||
for (int i = 0; i < np; ++i)
|
for (int i = 0; i < np; ++i)
|
||||||
{
|
{
|
||||||
float err, besterr = FLT_MAX;
|
float besterr = FLT_MAX;
|
||||||
|
|
||||||
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
||||||
{
|
{
|
||||||
err = Utils::metric4(colors[i], palette[j]);
|
float err = Utils::metric4(colors[i], palette[j]) * importance[i];
|
||||||
|
|
||||||
if (err > besterr) // error increased, so we're done searching
|
if (err > besterr) // error increased, so we're done searching
|
||||||
break;
|
break;
|
||||||
@ -456,7 +456,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_1 endp
|
|||||||
|
|
||||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||||
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts,
|
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_1 &old_endpts, IntEndptsRGB_1 &new_endpts,
|
||||||
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
// we have the old endpoints: old_endpts
|
// we have the old endpoints: old_endpts
|
||||||
@ -495,7 +495,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
|
||||||
|
|
||||||
if (err < min_err)
|
if (err < min_err)
|
||||||
{
|
{
|
||||||
@ -527,7 +527,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
// for np = 16 -- adjust error thresholds as a function of np
|
// for np = 16 -- adjust error thresholds as a function of np
|
||||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||||
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
||||||
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB_1 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
IntEndptsRGB_1 temp_endpts;
|
IntEndptsRGB_1 temp_endpts;
|
||||||
float best_err = orig_err;
|
float best_err = orig_err;
|
||||||
@ -577,7 +577,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -597,7 +597,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -619,7 +619,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
return best_err;
|
return best_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_1 &opt_endpts)
|
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_1 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_1 &opt_endpts)
|
||||||
{
|
{
|
||||||
float opt_err = orig_err;
|
float opt_err = orig_err;
|
||||||
|
|
||||||
@ -658,8 +658,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
{
|
{
|
||||||
// figure out which endpoint when perturbed gives the most improvement and start there
|
// figure out which endpoint when perturbed gives the most improvement and start there
|
||||||
// if we just alternate, we can easily end up in a local minima
|
// if we just alternate, we can easily end up in a local minima
|
||||||
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
||||||
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
||||||
|
|
||||||
if (err0 < err1)
|
if (err0 < err1)
|
||||||
{
|
{
|
||||||
@ -695,7 +695,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
// now alternate endpoints and keep trying until there is no improvement
|
// now alternate endpoints and keep trying until there is no improvement
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
||||||
if (err >= opt_err)
|
if (err >= opt_err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -729,7 +729,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
bool first = true;
|
bool first = true;
|
||||||
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
||||||
{
|
{
|
||||||
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||||
|
|
||||||
if (new_err < opt_err)
|
if (new_err < opt_err)
|
||||||
{
|
{
|
||||||
@ -768,6 +768,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
IntEndptsRGB_1 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_1 opt_endpts[NREGIONS])
|
IntEndptsRGB_1 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_1 opt_endpts[NREGIONS])
|
||||||
{
|
{
|
||||||
Vector4 pixels[Tile::TILE_TOTAL];
|
Vector4 pixels[Tile::TILE_TOTAL];
|
||||||
|
float importance[Tile::TILE_TOTAL];
|
||||||
IntEndptsRGB_1 temp_in, temp_out;
|
IntEndptsRGB_1 temp_in, temp_out;
|
||||||
int temp_indices[Tile::TILE_TOTAL];
|
int temp_indices[Tile::TILE_TOTAL];
|
||||||
|
|
||||||
@ -776,10 +777,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
pixels[np++] = tile.data[y][x];
|
pixels[np] = tile.data[y][x];
|
||||||
|
importance[np] = tile.importance_map[y][x];
|
||||||
|
np++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||||
opt_err[region] = orig_err[region];
|
opt_err[region] = orig_err[region];
|
||||||
@ -793,10 +799,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// make sure we have a valid error for temp_in
|
// make sure we have a valid error for temp_in
|
||||||
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
||||||
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
||||||
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
||||||
|
|
||||||
// now try to optimize these endpoints
|
// now try to optimize these endpoints
|
||||||
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||||
|
|
||||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||||
if (temp_out_err < best_err)
|
if (temp_out_err < best_err)
|
||||||
@ -873,7 +879,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (mode avpcl 1).";
|
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 1).";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clamp(Vector4 &v)
|
static void clamp(Vector4 &v)
|
||||||
@ -909,11 +916,11 @@ static float map_colors(const Tile &tile, int shapeindex, const FltEndpts endpts
|
|||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++)
|
||||||
{
|
{
|
||||||
int region = REGION(x,y,shapeindex);
|
int region = REGION(x,y,shapeindex);
|
||||||
float err, besterr = FLT_MAX;
|
float besterr = FLT_MAX;
|
||||||
|
|
||||||
for (int i = 0; i < NINDICES && besterr > 0; ++i)
|
for (int i = 0; i < NINDICES && besterr > 0; ++i)
|
||||||
{
|
{
|
||||||
err = Utils::metric4(tile.data[y][x], palette[region][i]);
|
float err = Utils::metric4(tile.data[y][x], palette[region][i]) * tile.importance_map[y][x];
|
||||||
|
|
||||||
if (err > besterr) // error increased, so we're done searching. this works for most norms.
|
if (err > besterr) // error increased, so we're done searching. this works for most norms.
|
||||||
break;
|
break;
|
||||||
|
@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "endpts.h"
|
#include "endpts.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -342,7 +342,7 @@ void AVPCL::decompress_mode2(const char *block, Tile &t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||||
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
Vector4 palette[NINDICES];
|
Vector4 palette[NINDICES];
|
||||||
float toterr = 0;
|
float toterr = 0;
|
||||||
@ -352,11 +352,11 @@ static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB &endp
|
|||||||
|
|
||||||
for (int i = 0; i < np; ++i)
|
for (int i = 0; i < np; ++i)
|
||||||
{
|
{
|
||||||
float err, besterr = FLT_MAX;
|
float besterr = FLT_MAX;
|
||||||
|
|
||||||
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
||||||
{
|
{
|
||||||
err = Utils::metric4(colors[i], palette[j]);
|
float err = Utils::metric4(colors[i], palette[j]) * importance[i];
|
||||||
|
|
||||||
if (err > besterr) // error increased, so we're done searching
|
if (err > besterr) // error increased, so we're done searching
|
||||||
break;
|
break;
|
||||||
@ -420,7 +420,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB endpts
|
|||||||
|
|
||||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||||
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts,
|
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB &old_endpts, IntEndptsRGB &new_endpts,
|
||||||
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
// we have the old endpoints: old_endpts
|
// we have the old endpoints: old_endpts
|
||||||
@ -459,7 +459,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
|
||||||
|
|
||||||
if (err < min_err)
|
if (err < min_err)
|
||||||
{
|
{
|
||||||
@ -491,7 +491,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
// for np = 16 -- adjust error thresholds as a function of np
|
// for np = 16 -- adjust error thresholds as a function of np
|
||||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||||
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
||||||
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
|
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGB &opt_endpts, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
IntEndptsRGB temp_endpts;
|
IntEndptsRGB temp_endpts;
|
||||||
float best_err = orig_err;
|
float best_err = orig_err;
|
||||||
@ -541,7 +541,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -561,7 +561,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -584,7 +584,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
return best_err;
|
return best_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB &opt_endpts)
|
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB &opt_endpts)
|
||||||
{
|
{
|
||||||
float opt_err = orig_err;
|
float opt_err = orig_err;
|
||||||
|
|
||||||
@ -623,8 +623,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
{
|
{
|
||||||
// figure out which endpoint when perturbed gives the most improvement and start there
|
// figure out which endpoint when perturbed gives the most improvement and start there
|
||||||
// if we just alternate, we can easily end up in a local minima
|
// if we just alternate, we can easily end up in a local minima
|
||||||
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
||||||
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
||||||
|
|
||||||
if (err0 < err1)
|
if (err0 < err1)
|
||||||
{
|
{
|
||||||
@ -660,7 +660,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
// now alternate endpoints and keep trying until there is no improvement
|
// now alternate endpoints and keep trying until there is no improvement
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
||||||
if (err >= opt_err)
|
if (err >= opt_err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -694,7 +694,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
bool first = true;
|
bool first = true;
|
||||||
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
||||||
{
|
{
|
||||||
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||||
|
|
||||||
if (new_err < opt_err)
|
if (new_err < opt_err)
|
||||||
{
|
{
|
||||||
@ -733,6 +733,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE])
|
const IntEndptsRGB orig_endpts[NREGIONS_THREE], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB opt_endpts[NREGIONS_THREE])
|
||||||
{
|
{
|
||||||
Vector4 pixels[Tile::TILE_TOTAL];
|
Vector4 pixels[Tile::TILE_TOTAL];
|
||||||
|
float importance[Tile::TILE_TOTAL];
|
||||||
IntEndptsRGB temp_in, temp_out;
|
IntEndptsRGB temp_in, temp_out;
|
||||||
|
|
||||||
for (int region=0; region<NREGIONS_THREE; ++region)
|
for (int region=0; region<NREGIONS_THREE; ++region)
|
||||||
@ -740,10 +741,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
pixels[np++] = tile.data[y][x];
|
pixels[np] = tile.data[y][x];
|
||||||
|
importance[np] = tile.importance_map[y][x];
|
||||||
|
np++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||||
opt_err[region] = orig_err[region];
|
opt_err[region] = orig_err[region];
|
||||||
@ -755,7 +761,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
float temp_in_err = orig_err[region];
|
float temp_in_err = orig_err[region];
|
||||||
|
|
||||||
// now try to optimize these endpoints
|
// now try to optimize these endpoints
|
||||||
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||||
|
|
||||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||||
if (temp_out_err < best_err)
|
if (temp_out_err < best_err)
|
||||||
@ -829,7 +835,9 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (avpcl mode 2).";
|
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 2).";
|
||||||
|
return FLT_MAX;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clamp(Vector4 &v)
|
static void clamp(Vector4 &v)
|
||||||
|
@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "endpts.h"
|
#include "endpts.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -390,7 +390,7 @@ void AVPCL::decompress_mode3(const char *block, Tile &t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||||
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGB_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
Vector4 palette[NINDICES];
|
Vector4 palette[NINDICES];
|
||||||
float toterr = 0;
|
float toterr = 0;
|
||||||
@ -400,11 +400,11 @@ static float map_colors(const Vector4 colors[], int np, const IntEndptsRGB_2 &en
|
|||||||
|
|
||||||
for (int i = 0; i < np; ++i)
|
for (int i = 0; i < np; ++i)
|
||||||
{
|
{
|
||||||
float err, besterr = FLT_MAX;
|
float besterr = FLT_MAX;
|
||||||
|
|
||||||
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
for (int j = 0; j < NINDICES && besterr > 0; ++j)
|
||||||
{
|
{
|
||||||
err = Utils::metric4(colors[i], palette[j]);
|
float err = Utils::metric4(colors[i], palette[j]) * importance[i];
|
||||||
|
|
||||||
if (err > besterr) // error increased, so we're done searching
|
if (err > besterr) // error increased, so we're done searching
|
||||||
break;
|
break;
|
||||||
@ -467,7 +467,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGB_2 endp
|
|||||||
|
|
||||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||||
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
|
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGB_2 &old_endpts, IntEndptsRGB_2 &new_endpts,
|
||||||
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
// we have the old endpoints: old_endpts
|
// we have the old endpoints: old_endpts
|
||||||
@ -506,7 +506,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
|
||||||
|
|
||||||
if (err < min_err)
|
if (err < min_err)
|
||||||
{
|
{
|
||||||
@ -538,7 +538,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
// for np = 16 -- adjust error thresholds as a function of np
|
// for np = 16 -- adjust error thresholds as a function of np
|
||||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||||
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
||||||
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float &orig_err, IntEndptsRGB_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
IntEndptsRGB_2 temp_endpts;
|
IntEndptsRGB_2 temp_endpts;
|
||||||
float best_err = orig_err;
|
float best_err = orig_err;
|
||||||
@ -588,7 +588,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -608,7 +608,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -631,7 +631,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
return best_err;
|
return best_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_2 &opt_endpts)
|
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGB_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGB_2 &opt_endpts)
|
||||||
{
|
{
|
||||||
float opt_err = orig_err;
|
float opt_err = orig_err;
|
||||||
|
|
||||||
@ -670,8 +670,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
{
|
{
|
||||||
// figure out which endpoint when perturbed gives the most improvement and start there
|
// figure out which endpoint when perturbed gives the most improvement and start there
|
||||||
// if we just alternate, we can easily end up in a local minima
|
// if we just alternate, we can easily end up in a local minima
|
||||||
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
||||||
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
||||||
|
|
||||||
if (err0 < err1)
|
if (err0 < err1)
|
||||||
{
|
{
|
||||||
@ -707,7 +707,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
// now alternate endpoints and keep trying until there is no improvement
|
// now alternate endpoints and keep trying until there is no improvement
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
||||||
if (err >= opt_err)
|
if (err >= opt_err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -741,7 +741,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
bool first = true;
|
bool first = true;
|
||||||
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
for (int ch = 0; ch < NCHANNELS_RGB; ++ch)
|
||||||
{
|
{
|
||||||
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||||
|
|
||||||
if (new_err < opt_err)
|
if (new_err < opt_err)
|
||||||
{
|
{
|
||||||
@ -781,6 +781,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
|
const IntEndptsRGB_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGB_2 opt_endpts[NREGIONS])
|
||||||
{
|
{
|
||||||
Vector4 pixels[Tile::TILE_TOTAL];
|
Vector4 pixels[Tile::TILE_TOTAL];
|
||||||
|
float importance[Tile::TILE_TOTAL];
|
||||||
IntEndptsRGB_2 temp_in, temp_out;
|
IntEndptsRGB_2 temp_in, temp_out;
|
||||||
int temp_indices[Tile::TILE_TOTAL];
|
int temp_indices[Tile::TILE_TOTAL];
|
||||||
|
|
||||||
@ -789,10 +790,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
pixels[np++] = tile.data[y][x];
|
pixels[np] = tile.data[y][x];
|
||||||
|
importance[np] = tile.importance_map[y][x];
|
||||||
|
np++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||||
opt_err[region] = orig_err[region];
|
opt_err[region] = orig_err[region];
|
||||||
@ -807,10 +813,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// make sure we have a valid error for temp_in
|
// make sure we have a valid error for temp_in
|
||||||
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
||||||
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
||||||
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
||||||
|
|
||||||
// now try to optimize these endpoints
|
// now try to optimize these endpoints
|
||||||
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||||
|
|
||||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||||
if (temp_out_err < best_err)
|
if (temp_out_err < best_err)
|
||||||
@ -885,7 +891,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (avpcl mode 3).";
|
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 3).";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clamp(Vector4 &v)
|
static void clamp(Vector4 &v)
|
||||||
|
@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "endpts.h"
|
#include "endpts.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -353,9 +353,9 @@ static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
|
|||||||
if (p.transform_mode)
|
if (p.transform_mode)
|
||||||
{
|
{
|
||||||
// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
|
// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
|
||||||
endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[1]);
|
endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
|
||||||
endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[2]);
|
endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
|
||||||
endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[3]);
|
endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -422,7 +422,7 @@ void AVPCL::decompress_mode4(const char *block, Tile &t)
|
|||||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||||
// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
|
// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
|
||||||
// exceeds what we already have
|
// exceeds what we already have
|
||||||
static float map_colors(const Vector4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
Vector3 palette_rgb[NINDICES3]; // could be nindices2
|
Vector3 palette_rgb[NINDICES3]; // could be nindices2
|
||||||
float palette_a[NINDICES3]; // could be nindices2
|
float palette_a[NINDICES3]; // could be nindices2
|
||||||
@ -519,7 +519,7 @@ static float map_colors(const Vector4 colors[], int np, int rotatemode, int inde
|
|||||||
}
|
}
|
||||||
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
|
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
|
||||||
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
|
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
|
||||||
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : (nvCheckMacro(0),0);
|
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
|
||||||
toterr += besterr;
|
toterr += besterr;
|
||||||
|
|
||||||
// do A index
|
// do A index
|
||||||
@ -647,7 +647,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
|
|||||||
}
|
}
|
||||||
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
|
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
|
||||||
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
|
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
|
||||||
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : (nvCheckMacro(0),0);
|
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
|
||||||
toterr[region] += besterr;
|
toterr[region] += besterr;
|
||||||
|
|
||||||
// do A index
|
// do A index
|
||||||
@ -672,7 +672,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
|
|||||||
|
|
||||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||||
static float perturb_one(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
|
static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
|
||||||
float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
// we have the old endpoints: old_endpts
|
// we have the old endpoints: old_endpts
|
||||||
@ -712,7 +712,7 @@ static float perturb_one(const Vector4 colors[], int np, int rotatemode, int ind
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
|
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
|
||||||
|
|
||||||
if (err < min_err)
|
if (err < min_err)
|
||||||
{
|
{
|
||||||
@ -744,7 +744,7 @@ static float perturb_one(const Vector4 colors[], int np, int rotatemode, int ind
|
|||||||
// if err > 40 6.25%
|
// if err > 40 6.25%
|
||||||
// for np = 16 -- adjust error thresholds as a function of np
|
// for np = 16 -- adjust error thresholds as a function of np
|
||||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||||
static float exhaustive(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
IntEndptsRGBA temp_endpts;
|
IntEndptsRGBA temp_endpts;
|
||||||
float best_err = orig_err;
|
float best_err = orig_err;
|
||||||
@ -795,7 +795,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -816,7 +816,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -841,7 +841,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
|
|||||||
return best_err;
|
return best_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float optimize_one(const Vector4 colors[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA &opt_endpts)
|
static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA &opt_endpts)
|
||||||
{
|
{
|
||||||
float opt_err = orig_err;
|
float opt_err = orig_err;
|
||||||
|
|
||||||
@ -878,8 +878,8 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
|
|||||||
{
|
{
|
||||||
// figure out which endpoint when perturbed gives the most improvement and start there
|
// figure out which endpoint when perturbed gives the most improvement and start there
|
||||||
// if we just alternate, we can easily end up in a local minima
|
// if we just alternate, we can easily end up in a local minima
|
||||||
float err0 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
||||||
float err1 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
||||||
|
|
||||||
if (err0 < err1)
|
if (err0 < err1)
|
||||||
{
|
{
|
||||||
@ -917,7 +917,7 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
|
|||||||
// now alternate endpoints and keep trying until there is no improvement
|
// now alternate endpoints and keep trying until there is no improvement
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
float err = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
||||||
if (err >= opt_err)
|
if (err >= opt_err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -950,7 +950,7 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
|
|||||||
bool first = true;
|
bool first = true;
|
||||||
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
|
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
|
||||||
{
|
{
|
||||||
float new_err = exhaustive(colors, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||||
|
|
||||||
if (new_err < opt_err)
|
if (new_err < opt_err)
|
||||||
{
|
{
|
||||||
@ -990,6 +990,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
|
|||||||
const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
|
const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
|
||||||
{
|
{
|
||||||
Vector4 pixels[Tile::TILE_TOTAL];
|
Vector4 pixels[Tile::TILE_TOTAL];
|
||||||
|
float importance[Tile::TILE_TOTAL];
|
||||||
IntEndptsRGBA temp_in, temp_out;
|
IntEndptsRGBA temp_in, temp_out;
|
||||||
|
|
||||||
for (int region=0; region<NREGIONS; ++region)
|
for (int region=0; region<NREGIONS; ++region)
|
||||||
@ -997,10 +998,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
pixels[np++] = tile.data[y][x];
|
pixels[np] = tile.data[y][x];
|
||||||
|
importance[np] = tile.importance_map[y][x];
|
||||||
|
np++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||||
opt_err[region] = orig_err[region];
|
opt_err[region] = orig_err[region];
|
||||||
@ -1012,7 +1018,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
|
|||||||
float temp_in_err = orig_err[region];
|
float temp_in_err = orig_err[region];
|
||||||
|
|
||||||
// now try to optimize these endpoints
|
// now try to optimize these endpoints
|
||||||
float temp_out_err = optimize_one(pixels, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||||
|
|
||||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||||
if (temp_out_err < best_err)
|
if (temp_out_err < best_err)
|
||||||
@ -1093,7 +1099,8 @@ static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (avpcl mode 4).";
|
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 4).";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clamp(Vector4 &v)
|
static void clamp(Vector4 &v)
|
||||||
|
@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "endpts.h"
|
#include "endpts.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -354,9 +354,9 @@ static void sign_extend(Pattern &p, IntEndptsRGBA endpts[NREGIONS])
|
|||||||
if (p.transform_mode)
|
if (p.transform_mode)
|
||||||
{
|
{
|
||||||
// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
|
// endpts[0].A[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]); // always positive here
|
||||||
endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[1]);
|
endpts[0].B[i] = SIGN_EXTEND(endpts[0].B[i], p.chan[i].nbitsizes[0]);
|
||||||
endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[2]);
|
endpts[1].A[i] = SIGN_EXTEND(endpts[1].A[i], p.chan[i].nbitsizes[1]);
|
||||||
endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[3]);
|
endpts[1].B[i] = SIGN_EXTEND(endpts[1].B[i], p.chan[i].nbitsizes[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -423,7 +423,7 @@ void AVPCL::decompress_mode5(const char *block, Tile &t)
|
|||||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||||
// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
|
// we already have a candidate mapping when we call this function, thus an error. take an early exit if the accumulated error so far
|
||||||
// exceeds what we already have
|
// exceeds what we already have
|
||||||
static float map_colors(const Vector4 colors[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
static float map_colors(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, const IntEndptsRGBA &endpts, const RegionPrec ®ion_prec, float current_besterr, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
Vector3 palette_rgb[NINDICES3]; // could be nindices2
|
Vector3 palette_rgb[NINDICES3]; // could be nindices2
|
||||||
float palette_a[NINDICES3]; // could be nindices2
|
float palette_a[NINDICES3]; // could be nindices2
|
||||||
@ -520,7 +520,7 @@ static float map_colors(const Vector4 colors[], int np, int rotatemode, int inde
|
|||||||
}
|
}
|
||||||
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
|
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[bestindex]).x :
|
||||||
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
|
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[bestindex]).y :
|
||||||
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : (nvCheckMacro(0),0);
|
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[bestindex]).z : nvCheckMacro(0);
|
||||||
toterr += besterr;
|
toterr += besterr;
|
||||||
|
|
||||||
// do A index
|
// do A index
|
||||||
@ -648,7 +648,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
|
|||||||
}
|
}
|
||||||
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
|
palette_alpha = (rotatemode == ROTATEMODE_RGBA_AGBR) ? (palette_rgb[region][bestindex]).x :
|
||||||
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
|
(rotatemode == ROTATEMODE_RGBA_RABG) ? (palette_rgb[region][bestindex]).y :
|
||||||
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : (nvCheckMacro(0),0);
|
(rotatemode == ROTATEMODE_RGBA_RGAB) ? (palette_rgb[region][bestindex]).z : nvCheckMacro(0);
|
||||||
toterr[region] += besterr;
|
toterr[region] += besterr;
|
||||||
|
|
||||||
// do A index
|
// do A index
|
||||||
@ -673,7 +673,7 @@ static void assign_indices(const Tile &tile, int shapeindex, int rotatemode, int
|
|||||||
|
|
||||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||||
static float perturb_one(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
|
static float perturb_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA &old_endpts, IntEndptsRGBA &new_endpts,
|
||||||
float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
float old_err, int do_b, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
// we have the old endpoints: old_endpts
|
// we have the old endpoints: old_endpts
|
||||||
@ -713,7 +713,7 @@ static float perturb_one(const Vector4 colors[], int np, int rotatemode, int ind
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
|
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, min_err, temp_indices);
|
||||||
|
|
||||||
if (err < min_err)
|
if (err < min_err)
|
||||||
{
|
{
|
||||||
@ -745,7 +745,7 @@ static float perturb_one(const Vector4 colors[], int np, int rotatemode, int ind
|
|||||||
// if err > 40 6.25%
|
// if err > 40 6.25%
|
||||||
// for np = 16 -- adjust error thresholds as a function of np
|
// for np = 16 -- adjust error thresholds as a function of np
|
||||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||||
static float exhaustive(const Vector4 colors[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
static float exhaustive(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA &opt_endpts, int indices[NINDEXARRAYS][Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
IntEndptsRGBA temp_endpts;
|
IntEndptsRGBA temp_endpts;
|
||||||
float best_err = orig_err;
|
float best_err = orig_err;
|
||||||
@ -796,7 +796,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -817,7 +817,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, rotatemode, indexmode, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -842,7 +842,7 @@ static float exhaustive(const Vector4 colors[], int np, int rotatemode, int inde
|
|||||||
return best_err;
|
return best_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float optimize_one(const Vector4 colors[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA &opt_endpts)
|
static float optimize_one(const Vector4 colors[], const float importance[], int np, int rotatemode, int indexmode, float orig_err, const IntEndptsRGBA &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA &opt_endpts)
|
||||||
{
|
{
|
||||||
float opt_err = orig_err;
|
float opt_err = orig_err;
|
||||||
|
|
||||||
@ -879,8 +879,8 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
|
|||||||
{
|
{
|
||||||
// figure out which endpoint when perturbed gives the most improvement and start there
|
// figure out which endpoint when perturbed gives the most improvement and start there
|
||||||
// if we just alternate, we can easily end up in a local minima
|
// if we just alternate, we can easily end up in a local minima
|
||||||
float err0 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
float err0 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
||||||
float err1 = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
float err1 = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
||||||
|
|
||||||
if (err0 < err1)
|
if (err0 < err1)
|
||||||
{
|
{
|
||||||
@ -918,7 +918,7 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
|
|||||||
// now alternate endpoints and keep trying until there is no improvement
|
// now alternate endpoints and keep trying until there is no improvement
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
float err = perturb_one(colors, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
float err = perturb_one(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
||||||
if (err >= opt_err)
|
if (err >= opt_err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -951,7 +951,7 @@ static float optimize_one(const Vector4 colors[], int np, int rotatemode, int in
|
|||||||
bool first = true;
|
bool first = true;
|
||||||
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
|
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
|
||||||
{
|
{
|
||||||
float new_err = exhaustive(colors, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
float new_err = exhaustive(colors, importance, np, rotatemode, indexmode, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||||
|
|
||||||
if (new_err < opt_err)
|
if (new_err < opt_err)
|
||||||
{
|
{
|
||||||
@ -991,6 +991,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
|
|||||||
const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
|
const IntEndptsRGBA orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA opt_endpts[NREGIONS])
|
||||||
{
|
{
|
||||||
Vector4 pixels[Tile::TILE_TOTAL];
|
Vector4 pixels[Tile::TILE_TOTAL];
|
||||||
|
float importance[Tile::TILE_TOTAL];
|
||||||
IntEndptsRGBA temp_in, temp_out;
|
IntEndptsRGBA temp_in, temp_out;
|
||||||
|
|
||||||
for (int region=0; region<NREGIONS; ++region)
|
for (int region=0; region<NREGIONS; ++region)
|
||||||
@ -998,10 +999,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
pixels[np++] = tile.data[y][x];
|
pixels[np] = tile.data[y][x];
|
||||||
|
importance[np] = tile.importance_map[y][x];
|
||||||
|
np++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||||
opt_err[region] = orig_err[region];
|
opt_err[region] = orig_err[region];
|
||||||
@ -1013,7 +1019,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, int rotatemode, in
|
|||||||
float temp_in_err = orig_err[region];
|
float temp_in_err = orig_err[region];
|
||||||
|
|
||||||
// now try to optimize these endpoints
|
// now try to optimize these endpoints
|
||||||
float temp_out_err = optimize_one(pixels, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
float temp_out_err = optimize_one(pixels, importance, np, rotatemode, indexmode, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||||
|
|
||||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||||
if (temp_out_err < best_err)
|
if (temp_out_err < best_err)
|
||||||
@ -1094,7 +1100,8 @@ static float refine(const Tile &tile, int shapeindex_best, int rotatemode, int i
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (avpcl mode 5).";
|
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 5).";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clamp(Vector4 &v)
|
static void clamp(Vector4 &v)
|
||||||
|
@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "endpts.h"
|
#include "endpts.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -390,7 +390,7 @@ void AVPCL::decompress_mode6(const char *block, Tile &t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||||
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
Vector4 palette[NINDICES];
|
Vector4 palette[NINDICES];
|
||||||
float toterr = 0;
|
float toterr = 0;
|
||||||
@ -470,7 +470,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end
|
|||||||
|
|
||||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||||
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
|
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
|
||||||
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
// we have the old endpoints: old_endpts
|
// we have the old endpoints: old_endpts
|
||||||
@ -509,7 +509,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
|
||||||
|
|
||||||
if (err < min_err)
|
if (err < min_err)
|
||||||
{
|
{
|
||||||
@ -541,7 +541,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
// for np = 16 -- adjust error thresholds as a function of np
|
// for np = 16 -- adjust error thresholds as a function of np
|
||||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||||
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
||||||
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
IntEndptsRGBA_2 temp_endpts;
|
IntEndptsRGBA_2 temp_endpts;
|
||||||
float best_err = orig_err;
|
float best_err = orig_err;
|
||||||
@ -591,7 +591,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -611,7 +611,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -634,7 +634,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
return best_err;
|
return best_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA_2 &opt_endpts)
|
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA_2 &opt_endpts)
|
||||||
{
|
{
|
||||||
float opt_err = orig_err;
|
float opt_err = orig_err;
|
||||||
|
|
||||||
@ -673,8 +673,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
{
|
{
|
||||||
// figure out which endpoint when perturbed gives the most improvement and start there
|
// figure out which endpoint when perturbed gives the most improvement and start there
|
||||||
// if we just alternate, we can easily end up in a local minima
|
// if we just alternate, we can easily end up in a local minima
|
||||||
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
||||||
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
||||||
|
|
||||||
if (err0 < err1)
|
if (err0 < err1)
|
||||||
{
|
{
|
||||||
@ -710,7 +710,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
// now alternate endpoints and keep trying until there is no improvement
|
// now alternate endpoints and keep trying until there is no improvement
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
||||||
if (err >= opt_err)
|
if (err >= opt_err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -744,7 +744,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
bool first = true;
|
bool first = true;
|
||||||
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
|
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
|
||||||
{
|
{
|
||||||
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||||
|
|
||||||
if (new_err < opt_err)
|
if (new_err < opt_err)
|
||||||
{
|
{
|
||||||
@ -783,6 +783,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
|
IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
|
||||||
{
|
{
|
||||||
Vector4 pixels[Tile::TILE_TOTAL];
|
Vector4 pixels[Tile::TILE_TOTAL];
|
||||||
|
float importance[Tile::TILE_TOTAL];
|
||||||
IntEndptsRGBA_2 temp_in, temp_out;
|
IntEndptsRGBA_2 temp_in, temp_out;
|
||||||
int temp_indices[Tile::TILE_TOTAL];
|
int temp_indices[Tile::TILE_TOTAL];
|
||||||
|
|
||||||
@ -791,10 +792,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
pixels[np++] = tile.data[y][x];
|
pixels[np] = tile.data[y][x];
|
||||||
|
importance[np] = tile.importance_map[y][x];
|
||||||
|
np++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||||
opt_err[region] = orig_err[region];
|
opt_err[region] = orig_err[region];
|
||||||
@ -810,10 +816,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// make sure we have a valid error for temp_in
|
// make sure we have a valid error for temp_in
|
||||||
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
||||||
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
||||||
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
||||||
|
|
||||||
// now try to optimize these endpoints
|
// now try to optimize these endpoints
|
||||||
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||||
|
|
||||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||||
if (temp_out_err < best_err)
|
if (temp_out_err < best_err)
|
||||||
@ -880,7 +886,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
|
|||||||
return orig_toterr;
|
return orig_toterr;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (avpcl mode 6).";
|
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 6).";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clamp(Vector4 &v)
|
static void clamp(Vector4 &v)
|
||||||
|
@ -21,7 +21,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
#include "nvmath/Matrix.inl"
|
#include "nvmath/Matrix.inl"
|
||||||
#include "nvmath/Fitting.h"
|
#include "nvmath/Fitting.h"
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "endpts.h"
|
#include "endpts.h"
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
#include <float.h>
|
#include <float.h>
|
||||||
@ -423,7 +423,7 @@ void AVPCL::decompress_mode7(const char *block, Tile &t)
|
|||||||
}
|
}
|
||||||
|
|
||||||
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
// given a collection of colors and quantized endpoints, generate a palette, choose best entries, and return a single toterr
|
||||||
static float map_colors(const Vector4 colors[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
static float map_colors(const Vector4 colors[], const float importance[], int np, const IntEndptsRGBA_2 &endpts, const RegionPrec ®ion_prec, float current_err, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
Vector4 palette[NINDICES];
|
Vector4 palette[NINDICES];
|
||||||
float toterr = 0;
|
float toterr = 0;
|
||||||
@ -503,7 +503,7 @@ static void assign_indices(const Tile &tile, int shapeindex, IntEndptsRGBA_2 end
|
|||||||
|
|
||||||
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
// note: indices are valid only if the value returned is less than old_err; otherwise they contain -1's
|
||||||
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
// this function returns either old_err or a value smaller (if it was successful in improving the error)
|
||||||
static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
|
static float perturb_one(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, const IntEndptsRGBA_2 &old_endpts, IntEndptsRGBA_2 &new_endpts,
|
||||||
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
float old_err, int do_b, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
// we have the old endpoints: old_endpts
|
// we have the old endpoints: old_endpts
|
||||||
@ -542,7 +542,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, min_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, min_err, temp_indices);
|
||||||
|
|
||||||
if (err < min_err)
|
if (err < min_err)
|
||||||
{
|
{
|
||||||
@ -574,7 +574,7 @@ static float perturb_one(const Vector4 colors[], int np, int ch, const RegionPre
|
|||||||
// for np = 16 -- adjust error thresholds as a function of np
|
// for np = 16 -- adjust error thresholds as a function of np
|
||||||
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
// always ensure endpoint ordering is preserved (no need to overlap the scan)
|
||||||
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
// if orig_err returned from this is less than its input value, then indices[] will contain valid indices
|
||||||
static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
static float exhaustive(const Vector4 colors[], const float importance[], int np, int ch, const RegionPrec ®ion_prec, float orig_err, IntEndptsRGBA_2 &opt_endpts, int indices[Tile::TILE_TOTAL])
|
||||||
{
|
{
|
||||||
IntEndptsRGBA_2 temp_endpts;
|
IntEndptsRGBA_2 temp_endpts;
|
||||||
float best_err = orig_err;
|
float best_err = orig_err;
|
||||||
@ -624,7 +624,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -644,7 +644,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
temp_endpts.A[ch] = a;
|
temp_endpts.A[ch] = a;
|
||||||
temp_endpts.B[ch] = b;
|
temp_endpts.B[ch] = b;
|
||||||
|
|
||||||
float err = map_colors(colors, np, temp_endpts, region_prec, best_err, temp_indices);
|
float err = map_colors(colors, importance, np, temp_endpts, region_prec, best_err, temp_indices);
|
||||||
if (err < best_err)
|
if (err < best_err)
|
||||||
{
|
{
|
||||||
amin = a;
|
amin = a;
|
||||||
@ -667,7 +667,7 @@ static float exhaustive(const Vector4 colors[], int np, int ch, const RegionPrec
|
|||||||
return best_err;
|
return best_err;
|
||||||
}
|
}
|
||||||
|
|
||||||
static float optimize_one(const Vector4 colors[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA_2 &opt_endpts)
|
static float optimize_one(const Vector4 colors[], const float importance[], int np, float orig_err, const IntEndptsRGBA_2 &orig_endpts, const RegionPrec ®ion_prec, IntEndptsRGBA_2 &opt_endpts)
|
||||||
{
|
{
|
||||||
float opt_err = orig_err;
|
float opt_err = orig_err;
|
||||||
|
|
||||||
@ -706,8 +706,8 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
{
|
{
|
||||||
// figure out which endpoint when perturbed gives the most improvement and start there
|
// figure out which endpoint when perturbed gives the most improvement and start there
|
||||||
// if we just alternate, we can easily end up in a local minima
|
// if we just alternate, we can easily end up in a local minima
|
||||||
float err0 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
float err0 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_a, opt_err, 0, temp_indices0); // perturb endpt A
|
||||||
float err1 = perturb_one(colors, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
float err1 = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_b, opt_err, 1, temp_indices1); // perturb endpt B
|
||||||
|
|
||||||
if (err0 < err1)
|
if (err0 < err1)
|
||||||
{
|
{
|
||||||
@ -743,7 +743,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
// now alternate endpoints and keep trying until there is no improvement
|
// now alternate endpoints and keep trying until there is no improvement
|
||||||
for (;;)
|
for (;;)
|
||||||
{
|
{
|
||||||
float err = perturb_one(colors, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
float err = perturb_one(colors, importance, np, ch, region_prec, opt_endpts, new_endpt, opt_err, do_b, temp_indices0);
|
||||||
if (err >= opt_err)
|
if (err >= opt_err)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -777,7 +777,7 @@ static float optimize_one(const Vector4 colors[], int np, float orig_err, const
|
|||||||
bool first = true;
|
bool first = true;
|
||||||
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
|
for (int ch = 0; ch < NCHANNELS_RGBA; ++ch)
|
||||||
{
|
{
|
||||||
float new_err = exhaustive(colors, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
float new_err = exhaustive(colors, importance, np, ch, region_prec, opt_err, opt_endpts, temp_indices0);
|
||||||
|
|
||||||
if (new_err < opt_err)
|
if (new_err < opt_err)
|
||||||
{
|
{
|
||||||
@ -816,6 +816,7 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
|
IntEndptsRGBA_2 orig_endpts[NREGIONS], const PatternPrec &pattern_prec, float opt_err[NREGIONS], IntEndptsRGBA_2 opt_endpts[NREGIONS])
|
||||||
{
|
{
|
||||||
Vector4 pixels[Tile::TILE_TOTAL];
|
Vector4 pixels[Tile::TILE_TOTAL];
|
||||||
|
float importance[Tile::TILE_TOTAL];
|
||||||
IntEndptsRGBA_2 temp_in, temp_out;
|
IntEndptsRGBA_2 temp_in, temp_out;
|
||||||
int temp_indices[Tile::TILE_TOTAL];
|
int temp_indices[Tile::TILE_TOTAL];
|
||||||
|
|
||||||
@ -824,10 +825,15 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// collect the pixels in the region
|
// collect the pixels in the region
|
||||||
int np = 0;
|
int np = 0;
|
||||||
|
|
||||||
for (int y = 0; y < tile.size_y; y++)
|
for (int y = 0; y < tile.size_y; y++) {
|
||||||
for (int x = 0; x < tile.size_x; x++)
|
for (int x = 0; x < tile.size_x; x++) {
|
||||||
if (REGION(x,y,shapeindex) == region)
|
if (REGION(x, y, shapeindex) == region) {
|
||||||
pixels[np++] = tile.data[y][x];
|
pixels[np] = tile.data[y][x];
|
||||||
|
importance[np] = tile.importance_map[y][x];
|
||||||
|
np++;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
opt_endpts[region] = temp_in = orig_endpts[region];
|
opt_endpts[region] = temp_in = orig_endpts[region];
|
||||||
opt_err[region] = orig_err[region];
|
opt_err[region] = orig_err[region];
|
||||||
@ -843,10 +849,10 @@ static void optimize_endpts(const Tile &tile, int shapeindex, const float orig_e
|
|||||||
// make sure we have a valid error for temp_in
|
// make sure we have a valid error for temp_in
|
||||||
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
// we use FLT_MAX here because we want an accurate temp_in_err, no shortcuts
|
||||||
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
// (mapcolors will compute a mapping but will stop if the error exceeds the value passed in the FLT_MAX position)
|
||||||
float temp_in_err = map_colors(pixels, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
float temp_in_err = map_colors(pixels, importance, np, temp_in, pattern_prec.region_precs[region], FLT_MAX, temp_indices);
|
||||||
|
|
||||||
// now try to optimize these endpoints
|
// now try to optimize these endpoints
|
||||||
float temp_out_err = optimize_one(pixels, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
float temp_out_err = optimize_one(pixels, importance, np, temp_in_err, temp_in, pattern_prec.region_precs[region], temp_out);
|
||||||
|
|
||||||
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
// if we find an improvement, update the best so far and correct the output endpoints and errors
|
||||||
if (temp_out_err < best_err)
|
if (temp_out_err < best_err)
|
||||||
@ -921,7 +927,8 @@ static float refine(const Tile &tile, int shapeindex_best, const FltEndpts endpt
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
throw "No candidate found, should never happen (avpcl mode 7).";
|
nvAssert(false); //throw "No candidate found, should never happen (mode avpcl 7).";
|
||||||
|
return FLT_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void clamp(Vector4 &v)
|
static void clamp(Vector4 &v)
|
||||||
|
@ -12,7 +12,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
|
|
||||||
// Utility and common routines
|
// Utility and common routines
|
||||||
|
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
#include "avpcl.h"
|
#include "avpcl.h"
|
||||||
#include "nvcore/Debug.h"
|
#include "nvcore/Debug.h"
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
@ -129,7 +129,7 @@ float Utils::metric4(Vector4::Arg a, Vector4::Arg b)
|
|||||||
{
|
{
|
||||||
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
|
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
|
||||||
}
|
}
|
||||||
else if (AVPCL::flag_nonuniform_ati)
|
else /*if (AVPCL::flag_nonuniform_ati)*/
|
||||||
{
|
{
|
||||||
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
|
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
|
||||||
}
|
}
|
||||||
@ -255,7 +255,7 @@ float Utils::metric4premult(Vector4::Arg a, Vector4::Arg b)
|
|||||||
{
|
{
|
||||||
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
|
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
|
||||||
}
|
}
|
||||||
else if (AVPCL::flag_nonuniform_ati)
|
else /*if (AVPCL::flag_nonuniform_ati)*/
|
||||||
{
|
{
|
||||||
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
|
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
|
||||||
}
|
}
|
||||||
@ -286,7 +286,7 @@ float Utils::metric3premult_alphaout(Vector3::Arg rgb0, float a0, Vector3::Arg r
|
|||||||
{
|
{
|
||||||
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
|
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
|
||||||
}
|
}
|
||||||
else if (AVPCL::flag_nonuniform_ati)
|
else /*if (AVPCL::flag_nonuniform_ati)*/
|
||||||
{
|
{
|
||||||
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
|
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
|
||||||
}
|
}
|
||||||
@ -341,7 +341,7 @@ float Utils::metric3premult_alphain(Vector3::Arg rgb0, Vector3::Arg rgb1, int ro
|
|||||||
{
|
{
|
||||||
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
|
rwt = 0.299f; gwt = 0.587f; bwt = 0.114f;
|
||||||
}
|
}
|
||||||
else if (AVPCL::flag_nonuniform_ati)
|
else /*if (AVPCL::flag_nonuniform_ati)*/
|
||||||
{
|
{
|
||||||
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
|
rwt = 0.3086f; gwt = 0.6094f; bwt = 0.0820f;
|
||||||
}
|
}
|
@ -60,8 +60,7 @@ private:
|
|||||||
return bit != 0;
|
return bit != 0;
|
||||||
}
|
}
|
||||||
void writeone(int bit) {
|
void writeone(int bit) {
|
||||||
if (readonly)
|
nvAssert (!readonly); // "Writing a read-only bit stream"
|
||||||
throw "Writing a read-only bit stream";
|
|
||||||
nvAssert (bptr < maxbits);
|
nvAssert (bptr < maxbits);
|
||||||
if (bptr >= maxbits) return;
|
if (bptr >= maxbits) return;
|
||||||
if (bit&1)
|
if (bit&1)
|
||||||
|
@ -15,7 +15,7 @@ See the License for the specific language governing permissions and limitations
|
|||||||
|
|
||||||
#include "nvmath/Vector.h"
|
#include "nvmath/Vector.h"
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "utils.h"
|
#include "avpcl_utils.h"
|
||||||
|
|
||||||
namespace AVPCL {
|
namespace AVPCL {
|
||||||
|
|
||||||
@ -28,6 +28,7 @@ public:
|
|||||||
static const int TILE_W = 4;
|
static const int TILE_W = 4;
|
||||||
static const int TILE_TOTAL = TILE_H * TILE_W;
|
static const int TILE_TOTAL = TILE_H * TILE_W;
|
||||||
nv::Vector4 data[TILE_H][TILE_W];
|
nv::Vector4 data[TILE_H][TILE_W];
|
||||||
|
float importance_map[TILE_H][TILE_W];
|
||||||
int size_x, size_y; // actual size of tile
|
int size_x, size_y; // actual size of tile
|
||||||
|
|
||||||
Tile() {};
|
Tile() {};
|
||||||
|
@ -1285,9 +1285,9 @@ __device__ void saveBlockDXT1_Parallel(uint endpoints, float3 colors[16], int xr
|
|||||||
ushort endpoint0 = endpoints & 0xFFFF;
|
ushort endpoint0 = endpoints & 0xFFFF;
|
||||||
ushort endpoint1 = endpoints >> 16;
|
ushort endpoint1 = endpoints >> 16;
|
||||||
|
|
||||||
int3 palette[4];
|
int3 palette[4];
|
||||||
palette[0] = color16ToInt3(endpoint0);
|
palette[0] = color16ToInt3(endpoint0);
|
||||||
palette[1] = color16ToInt3(endpoint1);
|
palette[1] = color16ToInt3(endpoint1);
|
||||||
|
|
||||||
int d0 = colorDistance(palette[0], color);
|
int d0 = colorDistance(palette[0], color);
|
||||||
int d1 = colorDistance(palette[1], color);
|
int d1 = colorDistance(palette[1], color);
|
||||||
|
@ -102,8 +102,13 @@ namespace nvtt
|
|||||||
Format_DXT1n, // Not supported.
|
Format_DXT1n, // Not supported.
|
||||||
Format_CTX1, // Not supported.
|
Format_CTX1, // Not supported.
|
||||||
|
|
||||||
Format_BC6,
|
Format_BC6, // Not supported yet.
|
||||||
Format_BC7,
|
Format_BC7, // Not supported yet.
|
||||||
|
|
||||||
|
Format_BC5_Luma, // Two DXT alpha blocks encoding a single float.
|
||||||
|
Format_BC3_RGBM, //
|
||||||
|
|
||||||
|
Format_Count
|
||||||
};
|
};
|
||||||
|
|
||||||
// Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input. They are only relevant in RGBA mode.
|
// Pixel types. These basically indicate how the output should be interpreted, but do not have any influence over the input. They are only relevant in RGBA mode.
|
||||||
@ -132,6 +137,7 @@ namespace nvtt
|
|||||||
Decoder_D3D10,
|
Decoder_D3D10,
|
||||||
Decoder_D3D9,
|
Decoder_D3D9,
|
||||||
Decoder_NV5x,
|
Decoder_NV5x,
|
||||||
|
//Decoder_RSX, // To take advantage of DXT5 bug.
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
@ -160,8 +166,9 @@ namespace nvtt
|
|||||||
|
|
||||||
NVTT_API void setPitchAlignment(int pitchAlignment);
|
NVTT_API void setPitchAlignment(int pitchAlignment);
|
||||||
|
|
||||||
// @@ I wish this wasn't part of the compression options. Quantization is applied before compression. We don't have compressors with error diffusion.
|
// @@ I wish this wasn't part of the compression options. Quantization is applied before compression. We don't have compressors with error diffusion.
|
||||||
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127); // (Deprecated in NVTT 2.1)
|
// @@ These options are only taken into account when using the InputOptions API.
|
||||||
|
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
|
||||||
|
|
||||||
NVTT_API void setTargetDecoder(Decoder decoder);
|
NVTT_API void setTargetDecoder(Decoder decoder);
|
||||||
|
|
||||||
@ -205,6 +212,7 @@ namespace nvtt
|
|||||||
InputFormat_BGRA_8UB, // Normalized [0, 1] 8 bit fixed point.
|
InputFormat_BGRA_8UB, // Normalized [0, 1] 8 bit fixed point.
|
||||||
InputFormat_RGBA_16F, // 16 bit floating point.
|
InputFormat_RGBA_16F, // 16 bit floating point.
|
||||||
InputFormat_RGBA_32F, // 32 bit floating point.
|
InputFormat_RGBA_32F, // 32 bit floating point.
|
||||||
|
InputFormat_R_32F, // Single channel 32 bit floating point.
|
||||||
};
|
};
|
||||||
|
|
||||||
// Mipmap downsampling filters.
|
// Mipmap downsampling filters.
|
||||||
@ -426,6 +434,7 @@ namespace nvtt
|
|||||||
|
|
||||||
|
|
||||||
// A surface is one level of a 2D or 3D texture. (New in NVTT 2.1)
|
// A surface is one level of a 2D or 3D texture. (New in NVTT 2.1)
|
||||||
|
// @@ It would be nice to add support for texture borders for correct resizing of tiled textures and constrained DXT compression.
|
||||||
struct Surface
|
struct Surface
|
||||||
{
|
{
|
||||||
NVTT_API Surface();
|
NVTT_API Surface();
|
||||||
@ -450,7 +459,7 @@ namespace nvtt
|
|||||||
NVTT_API bool isNormalMap() const;
|
NVTT_API bool isNormalMap() const;
|
||||||
NVTT_API int countMipmaps() const;
|
NVTT_API int countMipmaps() const;
|
||||||
NVTT_API int countMipmaps(int min_size) const;
|
NVTT_API int countMipmaps(int min_size) const;
|
||||||
NVTT_API float alphaTestCoverage(float alphaRef = 0.5) const;
|
NVTT_API float alphaTestCoverage(float alphaRef = 0.5, int alpha_channel = 3) const;
|
||||||
NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const;
|
NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const;
|
||||||
NVTT_API const float * data() const;
|
NVTT_API const float * data() const;
|
||||||
NVTT_API const float * channel(int i) const;
|
NVTT_API const float * channel(int i) const;
|
||||||
@ -496,9 +505,9 @@ namespace nvtt
|
|||||||
NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale);
|
NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale);
|
||||||
NVTT_API void setBorder(float r, float g, float b, float a);
|
NVTT_API void setBorder(float r, float g, float b, float a);
|
||||||
NVTT_API void fill(float r, float g, float b, float a);
|
NVTT_API void fill(float r, float g, float b, float a);
|
||||||
NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f);
|
NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f, int alpha_channel = 3);
|
||||||
NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f);
|
NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.25f);
|
||||||
NVTT_API void fromRGBM(float range = 1.0f, float threshold = 0.0f);
|
NVTT_API void fromRGBM(float range = 1.0f, float threshold = 0.25f);
|
||||||
NVTT_API void toLM(float range = 1.0f, float threshold = 0.0f);
|
NVTT_API void toLM(float range = 1.0f, float threshold = 0.0f);
|
||||||
NVTT_API void toRGBE(int mantissaBits, int exponentBits);
|
NVTT_API void toRGBE(int mantissaBits, int exponentBits);
|
||||||
NVTT_API void fromRGBE(int mantissaBits, int exponentBits);
|
NVTT_API void fromRGBE(int mantissaBits, int exponentBits);
|
||||||
@ -511,6 +520,7 @@ namespace nvtt
|
|||||||
NVTT_API void convolve(int channel, int kernelSize, float * kernelData);
|
NVTT_API void convolve(int channel, int kernelSize, float * kernelData);
|
||||||
NVTT_API void toLogScale(int channel, float base);
|
NVTT_API void toLogScale(int channel, float base);
|
||||||
NVTT_API void fromLogScale(int channel, float base);
|
NVTT_API void fromLogScale(int channel, float base);
|
||||||
|
NVTT_API void setAtlasBorder(int w, int h, float r, float g, float b, float a);
|
||||||
|
|
||||||
NVTT_API void toneMap(ToneMapper tm, float * parameters);
|
NVTT_API void toneMap(ToneMapper tm, float * parameters);
|
||||||
|
|
||||||
@ -648,6 +658,7 @@ namespace nvtt
|
|||||||
NVTT_API float angularError(const Surface & reference, const Surface & img);
|
NVTT_API float angularError(const Surface & reference, const Surface & img);
|
||||||
NVTT_API Surface diff(const Surface & reference, const Surface & img, float scale);
|
NVTT_API Surface diff(const Surface & reference, const Surface & img, float scale);
|
||||||
|
|
||||||
|
NVTT_API float rmsToneMappedError(const Surface & reference, const Surface & img, float exposure);
|
||||||
|
|
||||||
} // nvtt namespace
|
} // nvtt namespace
|
||||||
|
|
||||||
|
@ -341,7 +341,7 @@ int main(int argc, char *argv[])
|
|||||||
setIndex = atoi(argv[i+1]);
|
setIndex = atoi(argv[i+1]);
|
||||||
|
|
||||||
for (int j = 0; j < s_imageSetCount; j++) {
|
for (int j = 0; j < s_imageSetCount; j++) {
|
||||||
if (strCaseCmp(s_imageSets[j].name, argv[i+1]) == 0) {
|
if (strCaseDiff(s_imageSets[j].name, argv[i+1]) == 0) {
|
||||||
setIndex = j;
|
setIndex = j;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -96,7 +96,7 @@ int main(int argc, char *argv[])
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nv::strCaseCmp(output.extension(), ".dds") != 0)
|
if (nv::strCaseDiff(output.extension(), ".dds") != 0)
|
||||||
{
|
{
|
||||||
//output.stripExtension();
|
//output.stripExtension();
|
||||||
output.append(".dds");
|
output.append(".dds");
|
||||||
|
@ -376,7 +376,7 @@ int main(int argc, char *argv[])
|
|||||||
// Set input options.
|
// Set input options.
|
||||||
nvtt::InputOptions inputOptions;
|
nvtt::InputOptions inputOptions;
|
||||||
|
|
||||||
if (nv::strCaseCmp(input.extension(), ".dds") == 0)
|
if (nv::strCaseDiff(input.extension(), ".dds") == 0)
|
||||||
{
|
{
|
||||||
// Load surface.
|
// Load surface.
|
||||||
nv::DirectDrawSurface dds(input.str());
|
nv::DirectDrawSurface dds(input.str());
|
||||||
@ -428,7 +428,7 @@ int main(int argc, char *argv[])
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (nv::strCaseCmp(input.extension(), ".exr") == 0 || nv::strCaseCmp(input.extension(), ".hdr") == 0)
|
if (nv::strCaseDiff(input.extension(), ".exr") == 0 || nv::strCaseDiff(input.extension(), ".hdr") == 0)
|
||||||
{
|
{
|
||||||
loadAsFloat = true;
|
loadAsFloat = true;
|
||||||
}
|
}
|
||||||
@ -519,6 +519,8 @@ int main(int argc, char *argv[])
|
|||||||
nvtt::CompressionOptions compressionOptions;
|
nvtt::CompressionOptions compressionOptions;
|
||||||
compressionOptions.setFormat(format);
|
compressionOptions.setFormat(format);
|
||||||
|
|
||||||
|
//compressionOptions.setQuantization(/*color dithering*/true, /*alpha dithering*/false, /*binary alpha*/false);
|
||||||
|
|
||||||
if (format == nvtt::Format_BC2) {
|
if (format == nvtt::Format_BC2) {
|
||||||
// Dither alpha when using BC2.
|
// Dither alpha when using BC2.
|
||||||
compressionOptions.setQuantization(/*color dithering*/false, /*alpha dithering*/true, /*binary alpha*/false);
|
compressionOptions.setQuantization(/*color dithering*/false, /*alpha dithering*/true, /*binary alpha*/false);
|
||||||
@ -539,6 +541,10 @@ int main(int argc, char *argv[])
|
|||||||
// compressionOptions.setPixelFormat(16, 16, 16, 16);
|
// compressionOptions.setPixelFormat(16, 16, 16, 16);
|
||||||
// compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm);
|
// compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm);
|
||||||
// compressionOptions.setPixelFormat(16, 0, 0, 0);
|
// compressionOptions.setPixelFormat(16, 0, 0, 0);
|
||||||
|
|
||||||
|
//compressionOptions.setQuantization(/*color dithering*/true, /*alpha dithering*/false, /*binary alpha*/false);
|
||||||
|
//compressionOptions.setPixelType(nvtt::PixelType_UnsignedNorm);
|
||||||
|
//compressionOptions.setPixelFormat(5, 6, 5, 0);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
|
|
||||||
static bool loadImage(nv::Image & image, const char * fileName)
|
static bool loadImage(nv::Image & image, const char * fileName)
|
||||||
{
|
{
|
||||||
if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
|
if (nv::strCaseDiff(nv::Path::extension(fileName), ".dds") == 0)
|
||||||
{
|
{
|
||||||
nv::DirectDrawSurface dds(fileName);
|
nv::DirectDrawSurface dds(fileName);
|
||||||
if (!dds.isValid())
|
if (!dds.isValid())
|
||||||
@ -246,7 +246,7 @@ int main(int argc, char *argv[])
|
|||||||
double g = float(c0.g - c1.g);
|
double g = float(c0.g - c1.g);
|
||||||
double b = float(c0.b - c1.b);
|
double b = float(c0.b - c1.b);
|
||||||
double a = float(c0.a - c1.a);
|
double a = float(c0.a - c1.a);
|
||||||
|
|
||||||
error_r.addSample(r);
|
error_r.addSample(r);
|
||||||
error_g.addSample(g);
|
error_g.addSample(g);
|
||||||
error_b.addSample(b);
|
error_b.addSample(b);
|
||||||
|
@ -40,7 +40,7 @@
|
|||||||
|
|
||||||
static bool loadImage(nv::Image & image, const char * fileName)
|
static bool loadImage(nv::Image & image, const char * fileName)
|
||||||
{
|
{
|
||||||
if (nv::strCaseCmp(nv::Path::extension(fileName), ".dds") == 0)
|
if (nv::strCaseDiff(nv::Path::extension(fileName), ".dds") == 0)
|
||||||
{
|
{
|
||||||
nv::DirectDrawSurface dds(fileName);
|
nv::DirectDrawSurface dds(fileName);
|
||||||
if (!dds.isValid())
|
if (!dds.isValid())
|
||||||
|
Loading…
Reference in New Issue
Block a user