From 94d063528542cd87d36989e6a918589c997b69e1 Mon Sep 17 00:00:00 2001 From: "castano@gmail.com" Date: Fri, 7 Jun 2013 17:53:55 +0000 Subject: [PATCH] Merge changes from the Witness. --- project/vc9/hdrtest/hdrtest.vcproj | 330 +++++++++++ project/vc9/nvtt.sln | 41 +- project/vc9/nvtt/nvtt.vcproj | 16 +- project/vc9/testsuite/testsuite.vcproj | 162 ++++++ src/nvcore/Array.h | 4 +- src/nvcore/Array.inl | 5 +- src/nvcore/Debug.cpp | 153 ++++- src/nvcore/Debug.h | 10 +- src/nvcore/DefsGnucDarwin.h | 3 + src/nvcore/DefsGnucLinux.h | 3 +- src/nvcore/StdStream.h | 17 +- src/nvcore/StrLib.cpp | 9 +- src/nvcore/StrLib.h | 4 +- src/nvcore/Utils.h | 2 +- src/nvcore/nvcore.h | 26 +- src/nvimage/DirectDrawSurface.cpp | 6 +- src/nvimage/ErrorMetric.cpp | 2 - src/nvimage/FloatImage.cpp | 20 +- src/nvimage/NormalMap.cpp | 4 - src/nvmath/Color.h | 26 + src/nvmath/Color.inl | 114 +++- src/nvmath/Fitting.cpp | 2 +- src/nvmath/Fitting.h | 3 - src/nvmath/Half.cpp | 23 +- src/nvmath/Half.h | 48 +- src/nvmath/Vector.h | 22 +- src/nvmath/Vector.inl | 89 +++ src/nvmath/nvmath.h | 27 +- src/nvthread/Atomic.h | 109 +++- src/nvthread/Event.cpp | 31 +- src/nvthread/Mutex.cpp | 4 +- src/nvthread/ParallelFor.cpp | 5 - src/nvthread/Thread.cpp | 16 +- src/nvthread/nvthread.cpp | 28 +- ...{CompressorDXT.cpp => BlockCompressor.cpp} | 18 +- .../{CompressorDXT.h => BlockCompressor.h} | 8 +- src/nvtt/CompressorDX10.h | 10 +- src/nvtt/CompressorDX11.h | 2 +- src/nvtt/CompressorDX9.cpp | 7 +- src/nvtt/CompressorDX9.h | 26 +- src/nvtt/CompressorRGB.cpp | 25 +- src/nvtt/CubeSurface.cpp | 391 +++++++------ src/nvtt/CubeSurface.h | 3 + src/nvtt/InputOptions.h | 164 +++--- src/nvtt/Surface.cpp | 541 ++++++++++++------ src/nvtt/Surface.h | 2 + src/nvtt/nvtt.h | 34 +- src/nvtt/tests/testsuite.cpp | 2 +- src/nvtt/tools/cmdline.h | 2 +- 49 files changed, 1974 insertions(+), 625 deletions(-) create mode 100755 project/vc9/hdrtest/hdrtest.vcproj rename src/nvtt/{CompressorDXT.cpp => BlockCompressor.cpp} (90%) rename src/nvtt/{CompressorDXT.h => BlockCompressor.h} (91%) diff --git a/project/vc9/hdrtest/hdrtest.vcproj b/project/vc9/hdrtest/hdrtest.vcproj new file mode 100755 index 0000000..3299430 --- /dev/null +++ b/project/vc9/hdrtest/hdrtest.vcproj @@ -0,0 +1,330 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/project/vc9/nvtt.sln b/project/vc9/nvtt.sln index 04e494c..256cacb 100644 --- a/project/vc9/nvtt.sln +++ b/project/vc9/nvtt.sln @@ -97,6 +97,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cubemaptest", "cubemaptest\ {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} EndProjectSection EndProject +Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "hdrtest", "hdrtest\hdrtest.vcproj", "{E493E368-A4CF-4A8D-99DD-E128CC3A27EF}" + ProjectSection(ProjectDependencies) = postProject + {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} + {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647} + {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} + EndProjectSection +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Mixed Platforms = Debug|Mixed Platforms @@ -402,22 +409,26 @@ Global {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Mixed Platforms.Build.0 = Debug|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|x64 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.Build.0 = Debug|x64 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|Win32.ActiveCfg = Debug|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|Win32.Build.0 = Debug|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|x64.ActiveCfg = Debug|Win32 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|x64.ActiveCfg = Debug|x64 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug-CUDA|x64.Build.0 = Debug|x64 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Mixed Platforms.ActiveCfg = Release|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Mixed Platforms.Build.0 = Release|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|x64 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.Build.0 = Release|x64 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|Mixed Platforms.Build.0 = Release|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|Win32.ActiveCfg = Release|Win32 {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|Win32.Build.0 = Release|Win32 - {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|x64.ActiveCfg = Release|Win32 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|x64.ActiveCfg = Release|x64 + {317B694E-B5C1-42A6-956F-FC12B69175A6}.Release-CUDA|x64.Build.0 = Release|x64 {7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|Mixed Platforms.ActiveCfg = Debug|Win32 {7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|Mixed Platforms.Build.0 = Debug|Win32 {7DCF280E-702B-49F3-84A7-AE7E146384D6}.Debug|Win32.ActiveCfg = Debug|Win32 @@ -514,6 +525,28 @@ Global {CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|Win32.Build.0 = Release|Win32 {CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|x64.ActiveCfg = Release|x64 {CFB3FEAC-5720-4B16-9D7E-039DB180B641}.Release-CUDA|x64.Build.0 = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug|Mixed Platforms.ActiveCfg = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug|Mixed Platforms.Build.0 = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug|Win32.ActiveCfg = Debug|Win32 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug|Win32.Build.0 = Debug|Win32 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug|x64.ActiveCfg = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug|x64.Build.0 = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug-CUDA|Mixed Platforms.ActiveCfg = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug-CUDA|Mixed Platforms.Build.0 = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug-CUDA|Win32.ActiveCfg = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug-CUDA|x64.ActiveCfg = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Debug-CUDA|x64.Build.0 = Debug|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release|Mixed Platforms.ActiveCfg = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release|Mixed Platforms.Build.0 = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release|Win32.ActiveCfg = Release|Win32 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release|Win32.Build.0 = Release|Win32 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release|x64.ActiveCfg = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release|x64.Build.0 = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|Mixed Platforms.ActiveCfg = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|Mixed Platforms.Build.0 = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|Win32.ActiveCfg = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|x64.ActiveCfg = Release|x64 + {E493E368-A4CF-4A8D-99DD-E128CC3A27EF}.Release-CUDA|x64.Build.0 = Release|x64 EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE diff --git a/project/vc9/nvtt/nvtt.vcproj b/project/vc9/nvtt/nvtt.vcproj index d5b9407..88dce20 100644 --- a/project/vc9/nvtt/nvtt.vcproj +++ b/project/vc9/nvtt/nvtt.vcproj @@ -877,6 +877,14 @@ > + + + + @@ -1049,14 +1057,6 @@ RelativePath="..\..\..\src\nvtt\CompressorDX9.h" > - - - - diff --git a/project/vc9/testsuite/testsuite.vcproj b/project/vc9/testsuite/testsuite.vcproj index 294eac8..907bef0 100644 --- a/project/vc9/testsuite/testsuite.vcproj +++ b/project/vc9/testsuite/testsuite.vcproj @@ -12,6 +12,9 @@ + @@ -173,6 +176,165 @@ Name="VCPostBuildEventTool" /> + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/nvcore/Array.h b/src/nvcore/Array.h index 1a309d6..984aa90 100644 --- a/src/nvcore/Array.h +++ b/src/nvcore/Array.h @@ -108,7 +108,7 @@ namespace nv void push_back( const T & val ); void pushBack( const T & val ); - void append( const T & val ); + Array & append( const T & val ); Array & operator<< ( T & t ); void pop_back(); void popBack(); @@ -160,7 +160,7 @@ namespace nv friend void swap(Array & a, Array & b); -protected: + protected: void setArraySize(uint new_size); void setArrayCapacity(uint new_capacity); diff --git a/src/nvcore/Array.inl b/src/nvcore/Array.inl index a59dd89..f7369bc 100755 --- a/src/nvcore/Array.inl +++ b/src/nvcore/Array.inl @@ -22,7 +22,7 @@ namespace nv NV_FORCEINLINE void Array::push_back( const T & val ) { #if 1 - nvDebugCheck(&val < m_buffer || &val > m_buffer+m_size); + nvDebugCheck(&val < m_buffer || &val >= m_buffer+m_size); uint old_size = m_size; uint new_size = m_size + 1; @@ -57,9 +57,10 @@ namespace nv push_back(val); } template - NV_FORCEINLINE void Array::append( const T & val ) + NV_FORCEINLINE Array & Array::append( const T & val ) { push_back(val); + return *this; } // Qt like push operator. diff --git a/src/nvcore/Debug.cpp b/src/nvcore/Debug.cpp index 25c93a8..ebf77b3 100644 --- a/src/nvcore/Debug.cpp +++ b/src/nvcore/Debug.cpp @@ -66,7 +66,7 @@ # endif #endif -#define USE_SEPARATE_THREAD 1 +#define NV_USE_SEPARATE_THREAD 1 using namespace nv; @@ -101,7 +101,7 @@ namespace // We should try to simplify the top level filter as much as possible. // http://www.nynaeve.net/?p=128 -#if USE_SEPARATE_THREAD +#if NV_USE_SEPARATE_THREAD // The critical section enforcing the requirement that only one exception be // handled by a handler at a time. @@ -121,7 +121,7 @@ namespace static DWORD s_requesting_thread_id = 0; static EXCEPTION_POINTERS * s_exception_info = NULL; -#endif // USE_SEPARATE_THREAD +#endif // NV_USE_SEPARATE_THREAD struct MinidumpCallbackContext { @@ -236,7 +236,7 @@ namespace return true; } -#if USE_SEPARATE_THREAD +#if NV_USE_SEPARATE_THREAD static DWORD WINAPI ExceptionHandlerThreadMain(void* lpParameter) { nvDebugCheck(s_handler_start_semaphore != NULL); @@ -256,7 +256,7 @@ namespace return 0; } -#endif // USE_SEPARATE_THREAD +#endif // NV_USE_SEPARATE_THREAD static bool hasStackTrace() { return true; @@ -387,7 +387,9 @@ namespace DWORD dwDisplacement; if (!SymGetLineFromAddr64(hProcess, ip, &dwDisplacement, &theLine)) { - builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc); + // Do not print unknown symbols anymore. + break; + //builder.format("unknown(%08X) : %s\n", (uint32)ip, pFunc); } else { @@ -404,6 +406,10 @@ namespace } lines.append(builder.release()); + + if (pFunc != NULL && strcmp(pFunc, "WinMain") == 0) { + break; + } } } } @@ -413,7 +419,7 @@ namespace static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo) { EnterCriticalSection(&s_handler_critical_section); -#if USE_SEPARATE_THREAD +#if NV_USE_SEPARATE_THREAD s_requesting_thread_id = GetCurrentThreadId(); s_exception_info = pExceptionInfo; @@ -474,6 +480,36 @@ namespace } static void handleInvalidParameter(const wchar_t * expresion, const wchar_t * function, const wchar_t * file, unsigned int line, uintptr_t reserved) { + + size_t convertedCharCount = 0; + StringBuilder tmp; + + if (expresion != NULL) { + uint size = toU32(wcslen(expresion) + 1); + tmp.reserve(size); + wcstombs_s(&convertedCharCount, tmp.str(), size, expresion, _TRUNCATE); + + nvDebug("*** Invalid parameter: %s\n", tmp.str()); + + if (file != NULL) { + size = toU32(wcslen(file) + 1); + tmp.reserve(size); + wcstombs_s(&convertedCharCount, tmp.str(), size, file, _TRUNCATE); + + nvDebug(" On file: %s\n", tmp.str()); + + if (function != NULL) { + size = toU32(wcslen(function) + 1); + tmp.reserve(size); + wcstombs_s(&convertedCharCount, tmp.str(), size, function, _TRUNCATE); + + nvDebug(" On function: %s\n", tmp.str()); + } + + nvDebug(" On line: %u\n", line); + } + } + nvDebugBreak(); TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8); } @@ -706,16 +742,22 @@ namespace } // Assert handler method. - virtual int assertion( const char * exp, const char * file, int line, const char * func/*=NULL*/ ) + virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) { int ret = NV_ABORT_EXIT; StringBuilder error_string; - if( func != NULL ) { - error_string.format( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); + error_string.format("*** Assertion failed: %s\n On file: %s\n On line: %d\n", exp, file, line ); + if (func != NULL) { + error_string.appendFormat(" On function: %s\n", func); } - else { - error_string.format( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); + if (msg != NULL) { + error_string.append(" Message: "); + va_list tmp; + va_copy(tmp, arg); + error_string.appendFormatList(msg, tmp); + va_end(tmp); + error_string.append("\n"); } nvDebug( error_string.str() ); @@ -760,7 +802,7 @@ namespace struct Xbox360AssertHandler : public AssertHandler { // Assert handler method. - virtual int assertion( const char * exp, const char * file, int line, const char * func/*=NULL*/ ) + virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) { int ret = NV_ABORT_EXIT; @@ -786,14 +828,47 @@ namespace return ret; } }; +#elif NV_OS_ORBIS + + /** Orbis assert handler. */ + struct OrbisAssertHandler : public AssertHandler + { + // Assert handler method. + virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) + { + if( func != NULL ) { + nvDebug( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); + } + else { + nvDebug( "*** Assertion failed: %s\n On file: %s\n On line: %d\n ", exp, file, line ); + } + + //SBtodoORBIS print stack trace + /*if (hasStackTrace()) + { + void * trace[64]; + int size = backtrace(trace, 64); + printStackTrace(trace, size, 2); + }*/ + + //SBtodoORBIS check for debugger present + //if (debug::isDebuggerPresent()) + nvDebugBreak(); + + return NV_ABORT_DEBUG; + } + }; + #else /** Unix assert handler. */ struct UnixAssertHandler : public AssertHandler { // Assert handler method. - virtual int assertion(const char * exp, const char * file, int line, const char * func) + virtual int assertion(const char * exp, const char * file, int line, const char * func, const char * msg, va_list arg) { + int ret = NV_ABORT_EXIT; + if( func != NULL ) { nvDebug( "*** Assertion failed: %s\n On file: %s\n On function: %s\n On line: %d\n ", exp, file, func, line ); } @@ -816,9 +891,13 @@ namespace } #endif + if( ret == NV_ABORT_EXIT ) { // Exit cleanly. exit(EXIT_FAILURE + 1); } + + return ret; + } }; #endif @@ -827,22 +906,27 @@ namespace /// Handle assertion through the assert handler. -int nvAbort(const char * exp, const char * file, int line, const char * func/*=NULL*/) +int nvAbort(const char * exp, const char * file, int line, const char * func/*=NULL*/, const char * msg/*= NULL*/, ...) { #if NV_OS_WIN32 //&& NV_CC_MSVC static Win32AssertHandler s_default_assert_handler; #elif NV_OS_XBOX static Xbox360AssertHandler s_default_assert_handler; +#elif NV_OS_ORBIS + static OrbisAssertHandler s_default_assert_handler; #else static UnixAssertHandler s_default_assert_handler; #endif - if (s_assert_handler != NULL) { - return s_assert_handler->assertion( exp, file, line, func ); - } - else { - return s_default_assert_handler.assertion( exp, file, line, func ); - } + va_list arg; + va_start(arg,msg); + + AssertHandler * handler = s_assert_handler != NULL ? s_assert_handler : &s_default_assert_handler; + int result = handler->assertion(exp, file, line, func, msg, arg); + + va_end(arg); + + return result; } // Abnormal termination. Create mini dump and output call stack. @@ -914,6 +998,26 @@ void debug::dumpInfo() #endif } +/// Dump callstack using the specified handler. +void debug::dumpCallstack(MessageHandler *messageHandler, int callstackLevelsToSkip /*= 0*/) +{ +#if (NV_OS_WIN32 && NV_CC_MSVC) || (defined(HAVE_SIGNAL_H) && defined(HAVE_EXECINFO_H)) + if (hasStackTrace()) + { + void * trace[64]; + int size = backtrace(trace, 64); + + Array lines; + writeStackTrace(trace, size, callstackLevelsToSkip + 1, lines); // + 1 to skip the call to dumpCallstack + + for (uint i = 0; i < lines.count(); i++) { + messageHandler->log(lines[i], NULL); + delete lines[i]; + } + } +#endif +} + /// Set the debug message handler. void debug::setMessageHandler(MessageHandler * message_handler) @@ -939,9 +1043,8 @@ void debug::resetAssertHandler() s_assert_handler = NULL; } - #if NV_OS_WIN32 -#if USE_SEPARATE_THREAD +#if NV_USE_SEPARATE_THREAD static void initHandlerThread() { @@ -984,7 +1087,7 @@ static void shutHandlerThread() { // @@ Free stuff. Terminate thread. } -#endif // USE_SEPARATE_THREAD +#endif // NV_USE_SEPARATE_THREAD #endif // NV_OS_WIN32 @@ -1009,7 +1112,7 @@ void debug::enableSigHandler(bool interactive) } -#if USE_SEPARATE_THREAD +#if NV_USE_SEPARATE_THREAD initHandlerThread(); #endif diff --git a/src/nvcore/Debug.h b/src/nvcore/Debug.h index 5ba868c..c987e10 100644 --- a/src/nvcore/Debug.h +++ b/src/nvcore/Debug.h @@ -70,11 +70,12 @@ } \ NV_MULTI_LINE_MACRO_END -#define nvAssertMacroWithIgnoreAll(exp) \ +// GCC, LLVM need "##" before the __VA_ARGS__, MSVC doesn't care +#define nvAssertMacroWithIgnoreAll(exp,...) \ NV_MULTI_LINE_MACRO_BEGIN \ static bool ignoreAll = false; \ if (!ignoreAll && !(exp)) { \ - int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__); \ + int result = nvAbort(#exp, __FILE__, __LINE__, __FUNC__, ##__VA_ARGS__); \ if (result == NV_ABORT_DEBUG) { \ nvDebugBreak(); \ } else if (result == NV_ABORT_IGNORE) { \ @@ -157,7 +158,7 @@ #endif -NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL); +NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL, const char * msg = NULL, ...); NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2))); namespace nv @@ -184,7 +185,7 @@ namespace nv // Assert handler interface. struct AssertHandler { - virtual int assertion(const char *exp, const char *file, int line, const char *func = NULL) = 0; + virtual int assertion(const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg) = 0; virtual ~AssertHandler() {} }; @@ -192,6 +193,7 @@ namespace nv namespace debug { NVCORE_API void dumpInfo(); + NVCORE_API void dumpCallstack( MessageHandler *messageHandler, int callstackLevelsToSkip = 0 ); NVCORE_API void setMessageHandler( MessageHandler * messageHandler ); NVCORE_API void resetMessageHandler(); diff --git a/src/nvcore/DefsGnucDarwin.h b/src/nvcore/DefsGnucDarwin.h index cc1de77..6a3a52b 100644 --- a/src/nvcore/DefsGnucDarwin.h +++ b/src/nvcore/DefsGnucDarwin.h @@ -27,6 +27,7 @@ #define NV_FASTCALL __attribute__((fastcall)) #define NV_FORCEINLINE __attribute__((always_inline)) inline #define NV_DEPRECATED __attribute__((deprecated)) +#define NV_THREAD_LOCAL //ACS: there's no "__thread" or equivalent on iOS/OSX #if __GNUC__ > 2 #define NV_PURE __attribute__((pure)) @@ -38,6 +39,8 @@ #define NV_NOINLINE __attribute__((noinline)) + + // Define __FUNC__ properly. #if __STDC_VERSION__ < 199901L # if __GNUC__ >= 2 diff --git a/src/nvcore/DefsGnucLinux.h b/src/nvcore/DefsGnucLinux.h index 6ecd123..f8e6f80 100644 --- a/src/nvcore/DefsGnucLinux.h +++ b/src/nvcore/DefsGnucLinux.h @@ -25,8 +25,9 @@ #endif #define NV_FASTCALL __attribute__((fastcall)) -#define NV_FORCEINLINE inline __attribute__((always_inline)) +#define NV_FORCEINLINE __attribute__((always_inline)) #define NV_DEPRECATED __attribute__((deprecated)) +#define NV_THREAD_LOCAL __thread #if __GNUC__ > 2 diff --git a/src/nvcore/StdStream.h b/src/nvcore/StdStream.h index 7c0e438..08f399d 100644 --- a/src/nvcore/StdStream.h +++ b/src/nvcore/StdStream.h @@ -103,10 +103,25 @@ namespace nv clearerr(m_fp); } + // @@ The original implementation uses feof, which only returns true when we attempt to read *past* the end of the stream. + // That is, if we read the last byte of a file, then isAtEnd would still return false, even though the stream pointer is at the file end. This is not the intent and was inconsistent with the implementation of the MemoryStream, a better + // implementation uses use ftell and fseek to determine our location within the file. virtual bool isAtEnd() const { nvDebugCheck(m_fp != NULL); - return feof( m_fp ) != 0; + //return feof( m_fp ) != 0; +#if NV_OS_WIN32 + uint pos = _ftell_nolock(m_fp); + _fseek_nolock(m_fp, 0, SEEK_END); + uint end = _ftell_nolock(m_fp); + _fseek_nolock(m_fp, pos, SEEK_SET); +#else + uint pos = (uint)ftell(m_fp); + fseek(m_fp, 0, SEEK_END); + uint end = (uint)ftell(m_fp); + fseek(m_fp, pos, SEEK_SET); +#endif + return pos == end; } /// Always true. diff --git a/src/nvcore/StrLib.cpp b/src/nvcore/StrLib.cpp index df30095..01c498c 100644 --- a/src/nvcore/StrLib.cpp +++ b/src/nvcore/StrLib.cpp @@ -101,6 +101,13 @@ bool nv::strEqual(const char * s1, const char * s2) return strCmp(s1, s2) == 0; } +bool nv::strCaseEqual(const char * s1, const char * s2) +{ + if (s1 == s2) return true; + if (s1 == NULL || s2 == NULL) return false; + return strCaseCmp(s1, s2) == 0; +} + bool nv::strBeginsWith(const char * str, const char * prefix) { //return strstr(str, prefix) == dst; @@ -326,7 +333,7 @@ StringBuilder & StringBuilder::append( const char * s ) if (m_str == NULL) { m_size = slen + 1; m_str = strAlloc(m_size); - memcpy(m_str, s, m_size + 1); + memcpy(m_str, s, m_size); } else { const uint len = uint(strlen( m_str )); diff --git a/src/nvcore/StrLib.h b/src/nvcore/StrLib.h index a132325..e3c2e9a 100644 --- a/src/nvcore/StrLib.h +++ b/src/nvcore/StrLib.h @@ -35,12 +35,12 @@ namespace nv uint operator()(const char * str) const { return strHash(str); } }; - NVCORE_API uint strLen(const char * str) NV_PURE; - NVCORE_API int strCaseCmp(const char * s1, const char * s2) NV_PURE; NVCORE_API int strCmp(const char * s1, const char * s2) NV_PURE; + NVCORE_API int strCaseCmp(const char * s1, const char * s2) NV_PURE; NVCORE_API bool strEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings. + NVCORE_API bool strCaseEqual(const char * s1, const char * s2) NV_PURE; // Accepts NULL strings. template <> struct Equal { bool operator()(const char * a, const char * b) const { return strEqual(a, b); } diff --git a/src/nvcore/Utils.h b/src/nvcore/Utils.h index cef87ed..29ae96b 100644 --- a/src/nvcore/Utils.h +++ b/src/nvcore/Utils.h @@ -23,7 +23,7 @@ #define NV_INT32_MAX 2147483647 #define NV_UINT32_MAX 0xffffffff #define NV_INT64_MAX POSH_I64(9223372036854775807) -#define NV_INT64_MIN (-POSH_I64(9223372036854775808)) +#define NV_INT64_MIN (-POSH_I64(9223372036854775807)-1) #define NV_UINT64_MAX POSH_U64(0xffffffffffffffff) #define NV_HALF_MAX 65504.0F diff --git a/src/nvcore/nvcore.h b/src/nvcore/nvcore.h index 1681550..b02d5c4 100644 --- a/src/nvcore/nvcore.h +++ b/src/nvcore/nvcore.h @@ -31,12 +31,16 @@ // NV_OS_UNIX // NV_OS_DARWIN // NV_OS_XBOX +// NV_OS_ORBIS +// NV_OS_IOS #define NV_OS_STRING POSH_OS_STRING #if defined POSH_OS_LINUX # define NV_OS_LINUX 1 # define NV_OS_UNIX 1 +#elif defined POSH_OS_ORBIS +# define NV_OS_ORBIS 1 #elif defined POSH_OS_FREEBSD # define NV_OS_FREEBSD 1 # define NV_OS_UNIX 1 @@ -51,6 +55,10 @@ #elif defined POSH_OS_OSX # define NV_OS_DARWIN 1 # define NV_OS_UNIX 1 +#elif defined POSH_OS_IOS +# define NV_OS_DARWIN 1 //ACS should we keep this on IOS? +# define NV_OS_UNIX 1 +# define NV_OS_IOS 1 #elif defined POSH_OS_UNIX # define NV_OS_UNIX 1 #elif defined POSH_OS_WIN32 @@ -63,6 +71,22 @@ # error "Unsupported OS" #endif + +// Threading: +// some platforms don't implement __thread or similar for thread-local-storage +#if NV_OS_UNIX || NV_OS_ORBIS || NV_OS_IOS //ACStodoIOS darwin instead of ios? +# define NV_OS_USE_PTHREAD 1 +# if NV_OS_DARWIN || NV_OS_IOS +# define NV_OS_HAS_TLS_QUALIFIER 0 +# else +# define NV_OS_HAS_TLS_QUALIFIER 1 +# endif +#else +# define NV_OS_USE_PTHREAD 0 +# define NV_OS_HAS_TLS_QUALIFIER 1 +#endif + + // CPUs: // NV_CPU_X86 // NV_CPU_X86_64 @@ -182,7 +206,7 @@ typedef uint32 uint; #endif #if __cplusplus > 199711L -#define nvStaticCheck(x) static_assert(x) +#define nvStaticCheck(x) static_assert(x, "Static assert "#x" failed") #else #define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)] #endif diff --git a/src/nvimage/DirectDrawSurface.cpp b/src/nvimage/DirectDrawSurface.cpp index 0ca3d8a..002fd98 100644 --- a/src/nvimage/DirectDrawSurface.cpp +++ b/src/nvimage/DirectDrawSurface.cpp @@ -343,6 +343,7 @@ namespace case DXGI_FORMAT_B8G8R8X8_TYPELESS: case DXGI_FORMAT_B8G8R8X8_UNORM_SRGB: return 8*4; + default: return 0; } @@ -452,8 +453,9 @@ namespace { D3DFMT_A2R10G10B10, 32, 0x3FF00000, 0xFFC00, 0x3FF, 0xC0000000 }, { D3DFMT_A2B10G10R10, 32, 0x3FF, 0xFFC00, 0x3FF00000, 0xC0000000 }, - { D3DFMT_L8, 8, 8, 0, 0, 0 }, // DXGI_FORMAT_R8_UNORM - { D3DFMT_L16, 16, 16, 0, 0, 0 }, // DXGI_FORMAT_R16_UNORM + { D3DFMT_L8, 8, 0xFF, 0, 0, 0 }, // DXGI_FORMAT_R8_UNORM + { D3DFMT_L16, 16, 0xFFFF, 0, 0, 0 }, // DXGI_FORMAT_R16_UNORM + { D3DFMT_A8L8, 16, 0xFF, 0, 0, 0xFF00 }, // DXGI_FORMAT_R8G8_UNORM? }; static const uint s_d3d9FormatCount = NV_ARRAY_SIZE(s_d3d9Formats); diff --git a/src/nvimage/ErrorMetric.cpp b/src/nvimage/ErrorMetric.cpp index 8c14575..6d14b7a 100644 --- a/src/nvimage/ErrorMetric.cpp +++ b/src/nvimage/ErrorMetric.cpp @@ -186,8 +186,6 @@ static float f(float t) static float finv(float t) { - const float epsilon = powf(6.0f/29.0f, 3); - if (t > 6.0f / 29.0f) { return powf(t, 3.0f); } diff --git a/src/nvimage/FloatImage.cpp b/src/nvimage/FloatImage.cpp index 7f591e6..51acc98 100644 --- a/src/nvimage/FloatImage.cpp +++ b/src/nvimage/FloatImage.cpp @@ -344,15 +344,15 @@ float FloatImage::sampleLinear(uint c, float x, float y, float z, WrapMode wm) c float FloatImage::sampleNearestClamp(uint c, float x, float y) const { - int ix = ::clamp(iround(x * m_width), 0, m_width-1); - int iy = ::clamp(iround(y * m_height), 0, m_height-1); + int ix = wrapClamp(iround(x * m_width), m_width); + int iy = wrapClamp(iround(y * m_height), m_height); return pixel(c, ix, iy, 0); } float FloatImage::sampleNearestRepeat(uint c, float x, float y) const { - int ix = iround(frac(x) * m_width); - int iy = iround(frac(y) * m_height); + int ix = wrapRepeat(iround(x * m_width), m_width); + int iy = wrapRepeat(iround(y * m_height), m_height); return pixel(c, ix, iy, 0); } @@ -373,9 +373,9 @@ float FloatImage::sampleNearestClamp(uint c, float x, float y, float z) const float FloatImage::sampleNearestRepeat(uint c, float x, float y, float z) const { - int ix = iround(frac(x) * m_width); // wrapRepeat(iround(x * m_width), m_width) - int iy = iround(frac(y) * m_height); // wrapRepeat(iround(y * m_height), m_height) - int iz = iround(frac(z) * m_depth); // wrapRepeat(iround(z * m_depth), m_depth) + int ix = wrapRepeat(iround(x * m_width), m_width); + int iy = wrapRepeat(iround(y * m_height), m_height); + int iz = wrapRepeat(iround(z * m_depth), m_depth); return pixel(c, ix, iy, iz); } @@ -1326,7 +1326,7 @@ void FloatImage::flipZ() const uint d2 = d / 2; for (uint c = 0; c < m_componentCount; c++) { - for (uint z = 0; z < d/2; z++) { + for (uint z = 0; z < d2; z++) { float * src = plane(c, z); float * dst = plane(c, d - 1 - z); for (uint i = 0; i < w*h; i++) { @@ -1345,9 +1345,9 @@ float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel, float alph float coverage = 0.0f; +#if 0 const float * alpha = channel(alphaChannel); -#if 0 const uint count = m_pixelCount; for (uint i = 0; i < count; i++) { if (alpha[i] > alphaRef) coverage += 1.0f; // @@ gt or lt? @@ -1435,7 +1435,7 @@ void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int clamp(alphaChannel, 1, 0.0f, 1.0f); #endif #if _DEBUG - float newCoverage = alphaTestCoverage(alphaRef, alphaChannel); + alphaTestCoverage(alphaRef, alphaChannel); #endif } diff --git a/src/nvimage/NormalMap.cpp b/src/nvimage/NormalMap.cpp index 404186d..2db99a7 100644 --- a/src/nvimage/NormalMap.cpp +++ b/src/nvimage/NormalMap.cpp @@ -199,10 +199,6 @@ void nv::normalizeNormalMap(FloatImage * img) { nvDebugCheck(img != NULL); -#pragma NV_MESSAGE("TODO: Pack and expand normals explicitly?") - - img->expandNormals(0); img->normalize(0); - img->packNormals(0); } diff --git a/src/nvmath/Color.h b/src/nvmath/Color.h index 7324723..055395b 100644 --- a/src/nvmath/Color.h +++ b/src/nvmath/Color.h @@ -118,6 +118,32 @@ namespace nv }; }; + /// 16 bit 4444 BGRA color. + class NVMATH_CLASS Color16_4444 + { + public: + Color16_4444() { } + Color16_4444(const Color16_4444 & c) : u(c.u) { } + explicit Color16_4444(uint16 U) : u(U) { } + + union { + struct { +#if NV_LITTLE_ENDIAN + uint16 b : 4; + uint16 g : 4; + uint16 r : 4; + uint16 a : 4; +#else + uint16 a : 4; + uint16 r : 4; + uint16 g : 4; + uint16 b : 4; +#endif + }; + uint16 u; + }; + }; + } // nv namespace #endif // NV_MATH_COLOR_H diff --git a/src/nvmath/Color.inl b/src/nvmath/Color.inl index a5dfcb2..84ddc59 100644 --- a/src/nvmath/Color.inl +++ b/src/nvmath/Color.inl @@ -10,6 +10,12 @@ namespace nv { + // for Color16 & Color16_4444 bitfields + NV_FORCEINLINE uint32 U32round(float f) { return uint32(floorf(f + 0.5f)); } + NV_FORCEINLINE uint16 U16round(float f) { return uint16(floorf(f + 0.5f)); } + NV_FORCEINLINE uint16 toU4_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 15u); return (uint16)x; } + NV_FORCEINLINE uint16 toU5_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 31u); return (uint16)x; } + NV_FORCEINLINE uint16 toU6_in_U16(int x) { nvDebugCheck(x >= 0 && x <= 63u); return (uint16)x; } // Clamp color components. inline Vector3 colorClamp(Vector3::Arg c) @@ -27,6 +33,16 @@ namespace nv return c / scale; } + // Convert Color16 from float components + inline Color16 toColor16(float r, float g, float b) + { + Color16 color; // 5,6,5 + color.r = toU5_in_U16(nv::U16round(saturate(r) * 31u)); + color.g = toU6_in_U16(nv::U16round(saturate(g) * 63u)); + color.b = toU5_in_U16(nv::U16round(saturate(b) * 31u)); + return color; + } + // Convert Color32 to Color16. inline Color16 toColor16(Color32 c) { @@ -43,6 +59,49 @@ namespace nv return color; } + // Convert Color32 to Color16_4444. + inline Color16_4444 toColor16_4444(Color32 c) + { + Color16_4444 color; + color.a = c.a >> 4; + color.r = c.r >> 4; + color.g = c.g >> 4; + color.b = c.b >> 4; + return color; + } + + // Convert float[4] to Color16_4444. + inline Color16_4444 toColor16_4444(float r, float g, float b, float a) + { + Color16_4444 color; + color.a = toU4_in_U16(nv::U16round(saturate(a) * 15u)); + color.r = toU4_in_U16(nv::U16round(saturate(r) * 15u)); + color.g = toU4_in_U16(nv::U16round(saturate(g) * 15u)); + color.b = toU4_in_U16(nv::U16round(saturate(b) * 15u)); + return color; + } + + // Convert float[4] to Color16_4444. + inline Color16_4444 toColor16_4444_from_argb(float * fc) + { + Color16_4444 color; + color.a = toU4_in_U16(nv::U16round(saturate(fc[0]) * 15u)); + color.r = toU4_in_U16(nv::U16round(saturate(fc[1]) * 15u)); + color.g = toU4_in_U16(nv::U16round(saturate(fc[2]) * 15u)); + color.b = toU4_in_U16(nv::U16round(saturate(fc[3]) * 15u)); + return color; + } + + // Convert float[4] to Color16_4444. + inline Color16_4444 toColor16_4444_from_bgra(float * fc) + { + Color16_4444 color; + color.b = toU4_in_U16(nv::U16round(saturate(fc[0]) * 15u)); + color.g = toU4_in_U16(nv::U16round(saturate(fc[1]) * 15u)); + color.r = toU4_in_U16(nv::U16round(saturate(fc[2]) * 15u)); + color.a = toU4_in_U16(nv::U16round(saturate(fc[3]) * 15u)); + return color; + } // Promote 16 bit color to 32 bit using regular bit expansion. inline Color32 toColor32(Color16 c) @@ -60,13 +119,34 @@ namespace nv return color; } - inline Color32 toColor32(Vector4::Arg v) + // @@ Quantize with exact endpoints or with uniform bins? + inline Color32 toColor32(const Vector4 & v) { Color32 color; - color.r = uint8(saturate(v.x) * 255); - color.g = uint8(saturate(v.y) * 255); - color.b = uint8(saturate(v.z) * 255); - color.a = uint8(saturate(v.w) * 255); + color.r = toU8(nv::iround(saturate(v.x) * 255)); + color.g = toU8(nv::iround(saturate(v.y) * 255)); + color.b = toU8(nv::iround(saturate(v.z) * 255)); + color.a = toU8(nv::iround(saturate(v.w) * 255)); + return color; + } + + inline Color32 toColor32_from_bgra(const Vector4 & v) + { + Color32 color; + color.b = toU8(nv::iround(saturate(v.x) * 255)); + color.g = toU8(nv::iround(saturate(v.y) * 255)); + color.r = toU8(nv::iround(saturate(v.z) * 255)); + color.a = toU8(nv::iround(saturate(v.w) * 255)); + return color; + } + + inline Color32 toColor32_from_argb(const Vector4 & v) + { + Color32 color; + color.a = toU8(nv::iround(saturate(v.x) * 255)); + color.r = toU8(nv::iround(saturate(v.y) * 255)); + color.g = toU8(nv::iround(saturate(v.z) * 255)); + color.b = toU8(nv::iround(saturate(v.w) * 255)); return color; } @@ -92,6 +172,30 @@ namespace nv return h; } + inline float toSrgb(float f) { + if (nv::isNan(f)) f = 0.0f; + else if (f <= 0.0f) f = 0.0f; + else if (f <= 0.0031308f) f = 12.92f * f; + else if (f <= 1.0f) f = (powf(f, 0.41666f) * 1.055f) - 0.055f; + else f = 1.0f; + return f; + } + + inline float fromSrgb(float f) { + if (f < 0.0f) f = 0.0f; + else if (f < 0.04045f) f = f / 12.92f; + else if (f <= 1.0f) f = powf((f + 0.055f) / 1.055f, 2.4f); + else f = 1.0f; + return f; + } + + inline Vector3 toSrgb(const Vector3 & v) { + return Vector3(toSrgb(v.x), toSrgb(v.y), toSrgb(v.z)); + } + + inline Vector3 fromSrgb(const Vector3 & v) { + return Vector3(fromSrgb(v.x), fromSrgb(v.y), fromSrgb(v.z)); + } } // nv namespace diff --git a/src/nvmath/Fitting.cpp b/src/nvmath/Fitting.cpp index ff170d4..5b43ede 100644 --- a/src/nvmath/Fitting.cpp +++ b/src/nvmath/Fitting.cpp @@ -179,7 +179,7 @@ bool nv::Fit::isPlanar(int n, const Vector3 * points, float epsilon/*=NV_EPSILON { // compute the centroid and covariance float matrix[6]; - Vector3 centroid = computeCovariance(n, points, matrix); + computeCovariance(n, points, matrix); float eigenValues[3]; Vector3 eigenVectors[3]; diff --git a/src/nvmath/Fitting.h b/src/nvmath/Fitting.h index cf7bcdc..a99c4ac 100644 --- a/src/nvmath/Fitting.h +++ b/src/nvmath/Fitting.h @@ -9,9 +9,6 @@ namespace nv { - class Vector3; - class Plane; - namespace Fit { Vector3 computeCentroid(int n, const Vector3 * points); diff --git a/src/nvmath/Half.cpp b/src/nvmath/Half.cpp index 52c4bf4..512b5d3 100644 --- a/src/nvmath/Half.cpp +++ b/src/nvmath/Half.cpp @@ -74,14 +74,8 @@ // #include "Half.h" - -#include "nvcore/Memory.h" - #include -#if NV_CC_GNUC -#include -#endif // Load immediate static inline uint32 _uint32_li( uint32 a ) @@ -495,9 +489,20 @@ nv::half_to_float( uint16 h ) } +#if !NV_OS_IOS //ACStodoIOS some better define to choose this? + +#if NV_CC_GNUC +#if defined(__i386__) || defined(__x86_64__) +#include +#endif +#endif + +#include "nvcore/Memory.h" // NV_ALIGN_16 + static __m128 half_to_float4_SSE2(__m128i h) { #define SSE_CONST4(name, val) static const NV_ALIGN_16 uint name[4] = { (val), (val), (val), (val) } + #define CONST(name) *(const __m128i *)&name SSE_CONST4(mask_nosign, 0x7fff); @@ -541,7 +546,7 @@ static __m128 half_to_float4_SSE2(__m128i h) } -void nv::half_to_float_array(const uint16 * vin, float * vout, int count) { +void nv::half_to_float_array_SSE2(const uint16 * vin, float * vout, int count) { nvDebugCheck((intptr_t(vin) & 15) == 0); nvDebugCheck((intptr_t(vout) & 15) == 0); nvDebugCheck((count & 7) == 0); @@ -562,7 +567,7 @@ void nv::half_to_float_array(const uint16 * vin, float * vout, int count) { } } - +#endif // @@ These tables could be smaller. @@ -769,4 +774,4 @@ static inline uint16_t float_to_half_nobranch(uint32_t x) bits |= (x & 0x007fffff) >> shifttable[(x >> 23) & 0x1ff]; return bits; } -#endif \ No newline at end of file +#endif diff --git a/src/nvmath/Half.h b/src/nvmath/Half.h index 962767a..9027618 100644 --- a/src/nvmath/Half.h +++ b/src/nvmath/Half.h @@ -10,7 +10,8 @@ namespace nv { uint16 half_from_float( uint32 f ); // vin,vout must be 16 byte aligned. count must be a multiple of 8. - void half_to_float_array(const uint16 * vin, float * vout, int count); + // implement a non-SSE version if we need it. For now, this naming makes it clear this is only available when SSE2 is + void half_to_float_array_SSE2(const uint16 * vin, float * vout, int count); void half_init_tables(); @@ -40,6 +41,51 @@ namespace nv { return f.f; } + + union Half { + uint16 raw; + struct { + #if NV_BIG_ENDIAN + uint negative:1; + uint biasedexponent:5; + uint mantissa:10; + #else + uint mantissa:10; + uint biasedexponent:5; + uint negative:1; + #endif + } field; + }; + + + inline float TestHalfPrecisionAwayFromZero(float input) + { + Half h; + h.raw = to_half(input); + h.raw += 1; + + float f = to_float(h.raw); + + // Subtract the initial value to find our precision + float delta = f - input; + + return delta; + } + + inline float TestHalfPrecisionTowardsZero(float input) + { + Half h; + h.raw = to_half(input); + h.raw -= 1; + + float f = to_float(h.raw); + + // Subtract the initial value to find our precision + float delta = f - input; + + return -delta; + } + } // nv namespace #endif // NV_MATH_HALF_H diff --git a/src/nvmath/Vector.h b/src/nvmath/Vector.h index ef09b86..231d3b9 100644 --- a/src/nvmath/Vector.h +++ b/src/nvmath/Vector.h @@ -18,7 +18,8 @@ namespace nv Vector2(float x, float y); Vector2(Vector2::Arg v); - template operator T() const { return T(x, y); } + //template explicit Vector2(const T & v) : x(v.x), y(v.y) {} + //template operator T() const { return T(x, y); } const Vector2 & operator=(Vector2::Arg v); @@ -50,11 +51,13 @@ namespace nv Vector3(); explicit Vector3(float x); + //explicit Vector3(int x) : x(float(x)), y(float(x)), z(float(x)) {} Vector3(float x, float y, float z); Vector3(Vector2::Arg v, float z); Vector3(Vector3::Arg v); - template operator T() const { return T(x, y, z); } + //template explicit Vector3(const T & v) : x(v.x), y(v.y), z(v.z) {} + //template operator T() const { return T(x, y, z); } const Vector3 & operator=(Vector3::Arg v); @@ -96,7 +99,8 @@ namespace nv Vector4(Vector4::Arg v); // Vector4(const Quaternion & v); - template operator T() const { return T(x, y, z, w); } + //template explicit Vector4(const T & v) : x(v.x), y(v.y), z(v.z), w(v.w) {} + //template operator T() const { return T(x, y, z, w); } const Vector4 & operator=(Vector4::Arg v); @@ -127,4 +131,16 @@ namespace nv } // nv namespace +// If we had these functions, they would be ambiguous, the compiler would not know which one to pick: +//template Vector2 to(const T & v) { return Vector2(v.x, v.y); } +//template Vector3 to(const T & v) { return Vector3(v.x, v.y, v.z); } +//template Vector4 to(const T & v) { return Vector4(v.x, v.y, v.z, v.z); } + +// We could use a cast operator so that we could infer the expected type, but that doesn't work the same way in all compilers and produces horrible error messages. + +// Instead we simply have explicit casts: +template T to(const nv::Vector2 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector2)); return T(v.x, v.y); } +template T to(const nv::Vector3 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector3)); return T(v.x, v.y, v.z); } +template T to(const nv::Vector4 & v) { NV_COMPILER_CHECK(sizeof(T) == sizeof(nv::Vector4)); return T(v.x, v.y, v.z, v.z); } + #endif // NV_MATH_VECTOR_H diff --git a/src/nvmath/Vector.inl b/src/nvmath/Vector.inl index 996f764..6f26262 100644 --- a/src/nvmath/Vector.inl +++ b/src/nvmath/Vector.inl @@ -336,6 +336,11 @@ namespace nv return sqrtf(lengthSquared(v)); } + inline float distance(Vector2::Arg a, Vector2::Arg b) + { + return length(a - b); + } + inline float inverseLength(Vector2::Arg v) { return 1.0f / sqrtf(lengthSquared(v)); @@ -784,6 +789,90 @@ namespace nv return sdbmFloatHash(v.component, 4, h); } + +#if NV_OS_IOS // LLVM is not happy with implicit conversion of immediate constants to float + + //int: + + inline Vector2 scale(Vector2::Arg v, int s) + { + return Vector2(v.x * s, v.y * s); + } + + inline Vector2 operator*(Vector2::Arg v, int s) + { + return scale(v, s); + } + + inline Vector2 operator*(int s, Vector2::Arg v) + { + return scale(v, s); + } + + inline Vector2 operator/(Vector2::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + inline Vector3 scale(Vector3::Arg v, int s) + { + return Vector3(v.x * s, v.y * s, v.z * s); + } + + inline Vector3 operator*(Vector3::Arg v, int s) + { + return scale(v, s); + } + + inline Vector3 operator*(int s, Vector3::Arg v) + { + return scale(v, s); + } + + inline Vector3 operator/(Vector3::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + inline Vector4 scale(Vector4::Arg v, int s) + { + return Vector4(v.x * s, v.y * s, v.z * s, v.w * s); + } + + inline Vector4 operator*(Vector4::Arg v, int s) + { + return scale(v, s); + } + + inline Vector4 operator*(int s, Vector4::Arg v) + { + return scale(v, s); + } + + inline Vector4 operator/(Vector4::Arg v, int s) + { + return scale(v, 1.0f/s); + } + + //double: + + inline Vector3 operator*(Vector3::Arg v, double s) + { + return scale(v, (float)s); + } + + inline Vector3 operator*(double s, Vector3::Arg v) + { + return scale(v, (float)s); + } + + inline Vector3 operator/(Vector3::Arg v, double s) + { + return scale(v, 1.f/((float)s)); + } + +#endif //NV_OS_IOS + } // nv namespace #endif // NV_MATH_VECTOR_INL diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h index 91d9c43..4c455e8 100644 --- a/src/nvmath/nvmath.h +++ b/src/nvmath/nvmath.h @@ -132,7 +132,7 @@ namespace nv { #if NV_OS_WIN32 || NV_OS_XBOX return _finite(f) != 0; -#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD +#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS return isfinite(f); #elif NV_OS_LINUX return finitef(f); @@ -147,7 +147,7 @@ namespace nv { #if NV_OS_WIN32 || NV_OS_XBOX return _isnan(f) != 0; -#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD +#elif NV_OS_DARWIN || NV_OS_FREEBSD || NV_OS_OPENBSD || NV_OS_ORBIS return isnan(f); #elif NV_OS_LINUX return isnanf(f); @@ -242,21 +242,18 @@ namespace nv // I'm always confused about which quantizer to use. I think we should choose a quantizer based on how the values are expanded later and this is generally using the 'exact endpoints' rule. - // Quantize a [0, 1] full precision float, using exact endpoints. - inline float quantizeFloat(float f, uint bits) { + // Quantize a float in the [0,1] range, using exact end points or uniform bins. + inline float quantizeFloat(float x, uint bits, bool exactEndPoints = true) { nvDebugCheck(bits <= 16); - float scale = float((1 << bits) - 1); - float offset = 0.0f; - return floor(saturate(f) * scale + offset) / scale; - } - // Quantize a [0, 1] full precision float, using uniform bins. - /*inline float quantizeFloat(float f, uint bits) { - nvDebugCheck(bits <= 16); - float scale = float(1 << bits); - float offset = 0.5f; - return floor(saturate(f) * scale + offset) / scale; - }*/ + float range = float(1 << bits); + if (exactEndPoints) { + return floorf(x * (range-1) + 0.5f) / (range-1); + } + else { + return (floorf(x * range) + 0.5f) / range; + } + } union Float754 { unsigned int raw; diff --git a/src/nvthread/Atomic.h b/src/nvthread/Atomic.h index 0c16f8c..ece44b5 100644 --- a/src/nvthread/Atomic.h +++ b/src/nvthread/Atomic.h @@ -33,9 +33,15 @@ extern "C" #if NV_CC_CLANG && POSH_CPU_STRONGARM // LLVM/Clang do not yet have functioning atomics as of 2.1 // #include - #endif +//ACS: need this if we want to use Apple's atomics. +/* +#if NV_OS_IOS || NV_OS_DARWIN +// for iOS & OSX we use apple's atomics +#include "libkern/OSAtomic.h" +#endif +*/ namespace nv { @@ -72,8 +78,9 @@ namespace nv { nvDebugCheck((intptr_t(&value) & 3) == 0); #if POSH_CPU_X86 || POSH_CPU_X86_64 - *ptr = value; // on x86, stores are Release nvCompilerWriteBarrier(); + *ptr = value; // on x86, stores are Release + //nvCompilerWriteBarrier(); // @@ IC: Where does this barrier go? In nvtt it was after, in Witness before. Not sure which one is right. #elif POSH_CPU_STRONGARM // this is the easiest but slowest way to do this nvCompilerReadWriteBarrier(); @@ -114,17 +121,90 @@ namespace nv { inline uint32 atomicIncrement(uint32 * value) { nvDebugCheck((intptr_t(value) & 3) == 0); - return (uint32)_InterlockedIncrement((long *)value); } inline uint32 atomicDecrement(uint32 * value) { nvDebugCheck((intptr_t(value) & 3) == 0); - return (uint32)_InterlockedDecrement((long *)value); } + + // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'. + // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated. + // @@ Is this strong or weak? Does InterlockedCompareExchange have spurious failures? + inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + long result = _InterlockedCompareExchange((long *)value, (long)desired, (long)expected); + return result == (long)expected; + } + + + inline uint32 atomicSwap(uint32 * value, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return (uint32)_InterlockedExchange((long *)value, (long)desired); + } + +#elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN) + NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long)); + + //ACS: Use Apple's atomics instead? I don't know if these are better in any way; there are non-barrier versions too. There's no OSAtomicSwap32 tho' + /* + inline uint32 atomicIncrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return (uint32)OSAtomicIncrement32Barrier((int32_t *)value); + } + inline uint32 atomicDecrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return (uint32)OSAtomicDecrement32Barrier((int32_t *)value); + } + + // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'. + // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated. + // @@ Is this strong or weak? + inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return OSAtomicCompareAndSwap32Barrier((int32_t)expected, (int32_t)desired, (int32_t *)value); + } + */ + + inline uint32 atomicIncrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + + return __sync_add_and_fetch(value, 1); + } + + inline uint32 atomicDecrement(uint32 * value) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + + return __sync_sub_and_fetch(value, 1); + } + + // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'. + // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated. + // @@ Is this strong or weak? + inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_bool_compare_and_swap(value, expected, desired); + } + + inline uint32 atomicSwap(uint32 * value, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + // this is confusingly named, it doesn't actually do a test but always sets + return __sync_lock_test_and_set(value, desired); + } + + #elif NV_CC_CLANG && POSH_CPU_STRONGARM NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long)); @@ -183,15 +263,32 @@ namespace nv { { nvDebugCheck((intptr_t(value) & 3) == 0); - return __sync_fetch_and_add(value, 1); + return __sync_add_and_fetch(value, 1); } inline uint32 atomicDecrement(uint32 * value) { nvDebugCheck((intptr_t(value) & 3) == 0); - return __sync_fetch_and_sub(value, 1); + return __sync_sub_and_fetch(value, 1); } + + // Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'. + // @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated. + // @@ Is this strong or weak? + inline bool atomicCompareAndSwap(uint32 * value, uint32 expected, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + return __sync_bool_compare_and_swap(value, expected, desired); + } + + inline uint32 atomicSwap(uint32 * value, uint32 desired) + { + nvDebugCheck((intptr_t(value) & 3) == 0); + // this is confusingly named, it doesn't actually do a test but always sets + return __sync_lock_test_and_set(value, desired); + } + #else #error "Atomics not implemented." diff --git a/src/nvthread/Event.cpp b/src/nvthread/Event.cpp index d0c03b9..98a4bcc 100644 --- a/src/nvthread/Event.cpp +++ b/src/nvthread/Event.cpp @@ -4,7 +4,7 @@ #if NV_OS_WIN32 #include "Win32.h" -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD #include #endif @@ -32,19 +32,20 @@ void Event::wait() { WaitForSingleObject(m->handle, INFINITE); } -#elif NV_OS_UNIX - -#pragma NV_MESSAGE("Implement event using pthreads!") +#elif NV_OS_USE_PTHREAD struct Event::Private { pthread_cond_t pt_cond; pthread_mutex_t pt_mutex; + int count; + int wait_count; }; Event::Event() : m(new Private) { - // pthread equivalent of auto-reset event - pthread_cond_init(&m->pt_cond, NULL); + m->count=0; + m->wait_count=0; pthread_mutex_init(&m->pt_mutex, NULL); + pthread_cond_init(&m->pt_cond, NULL); } Event::~Event() { @@ -53,11 +54,29 @@ Event::~Event() { } void Event::post() { + pthread_mutex_lock(&m->pt_mutex); + + m->count++; + + //ACS: move this after the unlock? + if(m->wait_count>0) { pthread_cond_signal(&m->pt_cond); + } + + pthread_mutex_unlock(&m->pt_mutex); } void Event::wait() { + pthread_mutex_lock(&m->pt_mutex); + + while(m->count==0) { + m->wait_count++; pthread_cond_wait(&m->pt_cond, &m->pt_mutex); + m->wait_count--; + } + m->count--; + + pthread_mutex_unlock(&m->pt_mutex); } #endif // NV_OS_UNIX diff --git a/src/nvthread/Mutex.cpp b/src/nvthread/Mutex.cpp index cb6ebfc..b657c2e 100644 --- a/src/nvthread/Mutex.cpp +++ b/src/nvthread/Mutex.cpp @@ -6,7 +6,7 @@ #include "Win32.h" -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD #include #include // EBUSY @@ -48,7 +48,7 @@ void Mutex::unlock() LeaveCriticalSection(&m->mutex); } -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD struct Mutex::Private { pthread_mutex_t mutex; diff --git a/src/nvthread/ParallelFor.cpp b/src/nvthread/ParallelFor.cpp index 9632414..216c6d2 100644 --- a/src/nvthread/ParallelFor.cpp +++ b/src/nvthread/ParallelFor.cpp @@ -9,12 +9,7 @@ using namespace nv; -// @@ nvthread is only fully implemented in win32. -#if NV_OS_WIN32 #define ENABLE_PARALLEL_FOR 1 -#else -#define ENABLE_PARALLEL_FOR 0 -#endif static void worker(void * arg) { ParallelFor * owner = (ParallelFor *)arg; diff --git a/src/nvthread/Thread.cpp b/src/nvthread/Thread.cpp index 441d208..6c16ad8 100644 --- a/src/nvthread/Thread.cpp +++ b/src/nvthread/Thread.cpp @@ -4,7 +4,7 @@ #if NV_OS_WIN32 #include "Win32.h" -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD #include #include // usleep #endif @@ -15,7 +15,7 @@ struct Thread::Private { #if NV_OS_WIN32 HANDLE thread; -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD pthread_t thread; #endif @@ -32,7 +32,7 @@ unsigned long __stdcall threadFunc(void * arg) { return 0; } -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD extern "C" void * threadFunc(void * arg) { Thread::Private * thread = (Thread::Private *)arg; @@ -62,7 +62,7 @@ void Thread::start(ThreadFunc * func, void * arg) p->thread = CreateThread(NULL, 0, threadFunc, p.ptr(), 0, NULL); //p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, p.ptr(), 0, NULL); // @@ So that we can call CRT functions... nvDebugCheck(p->thread != NULL); -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD int result = pthread_create(&p->thread, NULL, threadFunc, p.ptr()); nvDebugCheck(result == 0); #endif @@ -76,7 +76,7 @@ void Thread::wait() BOOL ok = CloseHandle (p->thread); p->thread = NULL; nvCheck (ok); -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD int result = pthread_join(p->thread, NULL); p->thread = 0; nvDebugCheck(result == 0); @@ -87,7 +87,7 @@ bool Thread::isRunning () const { #if NV_OS_WIN32 return p->thread != NULL; -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD return p->thread != 0; #endif } @@ -101,7 +101,7 @@ bool Thread::isRunning () const { #if NV_OS_WIN32 SwitchToThread(); -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD int result = sched_yield(); nvDebugCheck(result == 0); #endif @@ -111,7 +111,7 @@ bool Thread::isRunning () const { #if NV_OS_WIN32 Sleep(ms); -#elif NV_OS_UNIX +#elif NV_OS_USE_PTHREAD usleep(1000 * ms); #endif } diff --git a/src/nvthread/nvthread.cpp b/src/nvthread/nvthread.cpp index 987e791..9de9a81 100644 --- a/src/nvthread/nvthread.cpp +++ b/src/nvthread/nvthread.cpp @@ -5,24 +5,24 @@ #include "Thread.h" #if NV_OS_WIN32 -# include "Win32.h" +#include "Win32.h" #elif NV_OS_UNIX -# include -# include -# include +#include +#include +#include #elif NV_OS_DARWIN -# import -# import -# import -# import +#import +#import +#import +#import -# include +//#include -# include -# include -# include -# include -# include +#include +#include +#include +#include +#include #endif using namespace nv; diff --git a/src/nvtt/CompressorDXT.cpp b/src/nvtt/BlockCompressor.cpp similarity index 90% rename from src/nvtt/CompressorDXT.cpp rename to src/nvtt/BlockCompressor.cpp index 7faeb49..88ebebb 100644 --- a/src/nvtt/CompressorDXT.cpp +++ b/src/nvtt/BlockCompressor.cpp @@ -22,7 +22,7 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#include "CompressorDXT.h" +#include "BlockCompressor.h" #include "OutputOptions.h" #include "TaskDispatcher.h" @@ -46,7 +46,7 @@ using namespace nvtt; #include #endif -void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, const float * data, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) { const uint bs = blockSize(); const uint bw = (w + 3) / 4; @@ -113,7 +113,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, c */ -struct FixedBlockCompressorContext +struct ColorBlockCompressorContext { nvtt::AlphaMode alphaMode; uint w, h; @@ -122,13 +122,13 @@ struct FixedBlockCompressorContext uint bw, bh, bs; uint8 * mem; - FixedBlockCompressor * compressor; + ColorBlockCompressor * compressor; }; // Each task compresses one block. -void FixedBlockCompressorTask(void * data, int i) +void ColorBlockCompressorTask(void * data, int i) { - FixedBlockCompressorContext * d = (FixedBlockCompressorContext *) data; + ColorBlockCompressorContext * d = (ColorBlockCompressorContext *) data; uint x = i % d->bw; uint y = i / d->bw; @@ -143,11 +143,11 @@ void FixedBlockCompressorTask(void * data, int i) } } -void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) +void ColorBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * data, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions) { nvDebugCheck(d == 1); - FixedBlockCompressorContext context; + ColorBlockCompressorContext context; context.alphaMode = alphaMode; context.w = w; context.h = h; @@ -169,7 +169,7 @@ void FixedBlockCompressor::compress(nvtt::AlphaMode alphaMode, uint w, uint h, u const uint size = context.bs * count; context.mem = new uint8[size]; - dispatcher->dispatch(FixedBlockCompressorTask, &context, count); + dispatcher->dispatch(ColorBlockCompressorTask, &context, count); outputOptions.writeData(context.mem, size); diff --git a/src/nvtt/CompressorDXT.h b/src/nvtt/BlockCompressor.h similarity index 91% rename from src/nvtt/CompressorDXT.h rename to src/nvtt/BlockCompressor.h index 871eb94..cc829ce 100644 --- a/src/nvtt/CompressorDXT.h +++ b/src/nvtt/BlockCompressor.h @@ -22,8 +22,8 @@ // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // OTHER DEALINGS IN THE SOFTWARE. -#ifndef NVTT_COMPRESSORDXT_H -#define NVTT_COMPRESSORDXT_H +#ifndef NVTT_BLOCKCOMPRESSOR_H +#define NVTT_BLOCKCOMPRESSOR_H #include "Compressor.h" @@ -33,7 +33,7 @@ namespace nv struct ColorSet; struct ColorBlock; - struct FixedBlockCompressor : public CompressorInterface + struct ColorBlockCompressor : public CompressorInterface { virtual void compress(nvtt::AlphaMode alphaMode, uint w, uint h, uint d, const float * rgba, nvtt::TaskDispatcher * dispatcher, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); @@ -52,4 +52,4 @@ namespace nv } // nv namespace -#endif // NVTT_COMPRESSORDXT_H +#endif // NVTT_BLOCKCOMPRESSOR_H diff --git a/src/nvtt/CompressorDX10.h b/src/nvtt/CompressorDX10.h index 355e642..5be6361 100644 --- a/src/nvtt/CompressorDX10.h +++ b/src/nvtt/CompressorDX10.h @@ -25,20 +25,20 @@ #ifndef NVTT_COMPRESSORDX10_H #define NVTT_COMPRESSORDX10_H -#include "CompressorDXT.h" +#include "BlockCompressor.h" namespace nv { struct ColorBlock; // Fast CPU compressors. - struct FastCompressorBC4 : public FixedBlockCompressor + struct FastCompressorBC4 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } }; - struct FastCompressorBC5 : public FixedBlockCompressor + struct FastCompressorBC5 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } @@ -46,13 +46,13 @@ namespace nv // Production CPU compressors. - struct ProductionCompressorBC4 : public FixedBlockCompressor + struct ProductionCompressorBC4 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } }; - struct ProductionCompressorBC5 : public FixedBlockCompressor + struct ProductionCompressorBC5 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } diff --git a/src/nvtt/CompressorDX11.h b/src/nvtt/CompressorDX11.h index f665e3f..3dda9ea 100644 --- a/src/nvtt/CompressorDX11.h +++ b/src/nvtt/CompressorDX11.h @@ -24,7 +24,7 @@ #ifndef NVTT_COMPRESSORDX11_H #define NVTT_COMPRESSORDX11_H -#include "CompressorDXT.h" +#include "BlockCompressor.h" namespace nv { diff --git a/src/nvtt/CompressorDX9.cpp b/src/nvtt/CompressorDX9.cpp index f5446a0..c3bee15 100644 --- a/src/nvtt/CompressorDX9.cpp +++ b/src/nvtt/CompressorDX9.cpp @@ -122,11 +122,7 @@ void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, co if (set.isSingleColor(/*ignoreAlpha*/true)) { - Color32 c; - c.r = uint8(clamp(set.colors[0].x, 0.0f, 1.0f) * 255); - c.g = uint8(clamp(set.colors[0].y, 0.0f, 1.0f) * 255); - c.b = uint8(clamp(set.colors[0].z, 0.0f, 1.0f) * 255); - c.a = 255; + Color32 c = toColor32(set.colors[0]); OptimalCompress::compressDXT1(c, block); } else @@ -202,7 +198,6 @@ void CompressorDXT1_Luma::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha OptimalCompress::compressDXT1_Luma(rgba, block); } - void CompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output) { BlockDXT3 * block = new(output) BlockDXT3; diff --git a/src/nvtt/CompressorDX9.h b/src/nvtt/CompressorDX9.h index 2a6e6fe..e3e830b 100644 --- a/src/nvtt/CompressorDX9.h +++ b/src/nvtt/CompressorDX9.h @@ -25,38 +25,38 @@ #ifndef NVTT_COMPRESSORDX9_H #define NVTT_COMPRESSORDX9_H -#include "CompressorDXT.h" +#include "BlockCompressor.h" namespace nv { struct ColorBlock; // Fast CPU compressors. - struct FastCompressorDXT1 : public FixedBlockCompressor + struct FastCompressorDXT1 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } }; - struct FastCompressorDXT1a : public FixedBlockCompressor + struct FastCompressorDXT1a : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } }; - struct FastCompressorDXT3 : public FixedBlockCompressor + struct FastCompressorDXT3 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } }; - struct FastCompressorDXT5 : public FixedBlockCompressor + struct FastCompressorDXT5 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } }; - struct FastCompressorDXT5n : public FixedBlockCompressor + struct FastCompressorDXT5n : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } @@ -71,38 +71,38 @@ namespace nv virtual uint blockSize() const { return 8; } }; #else - struct CompressorDXT1 : public FixedBlockCompressor + struct CompressorDXT1 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } }; #endif - struct CompressorDXT1a : public FixedBlockCompressor + struct CompressorDXT1a : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } }; - struct CompressorDXT1_Luma : public FixedBlockCompressor + struct CompressorDXT1_Luma : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } }; - struct CompressorDXT3 : public FixedBlockCompressor + struct CompressorDXT3 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } }; - struct CompressorDXT5 : public FixedBlockCompressor + struct CompressorDXT5 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } }; - struct CompressorDXT5n : public FixedBlockCompressor + struct CompressorDXT5n : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 16; } @@ -137,7 +137,7 @@ namespace nv #endif #if defined(HAVE_STB) - struct StbCompressorDXT1 : public FixedBlockCompressor + struct StbCompressorDXT1 : public ColorBlockCompressor { virtual void compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output); virtual uint blockSize() const { return 8; } diff --git a/src/nvtt/CompressorRGB.cpp b/src/nvtt/CompressorRGB.cpp index 9a52d16..f2b91b6 100644 --- a/src/nvtt/CompressorRGB.cpp +++ b/src/nvtt/CompressorRGB.cpp @@ -349,20 +349,23 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint } else { - Color32 c; - if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) { - c.r = uint8(clamp(r * 255, 0.0f, 255.0f)); - c.g = uint8(clamp(g * 255, 0.0f, 255.0f)); - c.b = uint8(clamp(b * 255, 0.0f, 255.0f)); - c.a = uint8(clamp(a * 255, 0.0f, 255.0f)); - } + // We first convert to 16 bits, then to the target size. @@ If greater than 16 bits, this will truncate and bitexpand. + // @@ Add support for nvtt::PixelType_SignedInt, nvtt::PixelType_SignedNorm, nvtt::PixelType_UnsignedInt + int ir, ig, ib, ia; + if (compressionOptions.pixelType == nvtt::PixelType_UnsignedNorm) { + ir = iround(clamp(r * 65535.0f, 0.0f, 65535.0f)); + ig = iround(clamp(g * 65535.0f, 0.0f, 65535.0f)); + ib = iround(clamp(b * 65535.0f, 0.0f, 65535.0f)); + ia = iround(clamp(a * 65535.0f, 0.0f, 65535.0f)); + } + uint p = 0; - p |= PixelFormat::convert(c.r, 8, rsize) << rshift; - p |= PixelFormat::convert(c.g, 8, gsize) << gshift; - p |= PixelFormat::convert(c.b, 8, bsize) << bshift; - p |= PixelFormat::convert(c.a, 8, asize) << ashift; + p |= PixelFormat::convert(ir, 16, rsize) << rshift; + p |= PixelFormat::convert(ig, 16, gsize) << gshift; + p |= PixelFormat::convert(ib, 16, bsize) << bshift; + p |= PixelFormat::convert(ia, 16, asize) << ashift; stream.putBits(p, bitCount); } diff --git a/src/nvtt/CubeSurface.cpp b/src/nvtt/CubeSurface.cpp index f906472..cb68113 100644 --- a/src/nvtt/CubeSurface.cpp +++ b/src/nvtt/CubeSurface.cpp @@ -429,6 +429,12 @@ void CubeSurface::range(int channel, float * minimum_ptr, float * maximum_ptr) c *maximum_ptr = maximum; } +void CubeSurface::clamp(int channel, float low/*= 0.0f*/, float high/*= 1.0f*/) { + for (int f = 0; f < 6; f++) { + m->face[f].clamp(channel, low, high); + } +} + #include "nvmath/SphericalHarmonic.h" @@ -470,13 +476,114 @@ CubeSurface CubeSurface::irradianceFilter(int size, EdgeFixup fixupMethod) const } -// Warp uv coordinate from [-1, 1] to -/*float warp(float u, int size) { - -}*/ +// Convolve filter against this cube. +Vector3 CubeSurface::Private::applyAngularFilter(const Vector3 & filterDir, float coneAngle, float * filterTable, int tableSize) +{ + const float cosineConeAngle = cos(coneAngle); + nvDebugCheck(cosineConeAngle >= 0); + Vector3 color(0); + float sum = 0; + + // Things I have tried to speed this up: + // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere. + // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case. + // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex. + + // What AMD CubeMapGen does: + // - Compute conservative bounds on the primary face, wrap around the adjacent faces. + + + // For each texel of the input cube. + for (uint f = 0; f < 6; f++) { + + // Test face cone agains filter cone. + float cosineFaceAngle = dot(filterDir, faceNormals[f]); + float faceAngle = acosf(cosineFaceAngle); + + if (faceAngle > coneAngle + atanf(sqrtf(2))) { + // Skip face. + continue; + } + + const int L = toI32(edgeLength-1); + int x0 = 0, x1 = L; + int y0 = 0, y1 = L; + +#if 0 + float u0 = -1; + float u1 = 1; + float v0 = -1; + float v1 = 1; + + // @@ Compute uvs. + + // Expand uv coordinates from [-1,1] to [0, edgeLength) + u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f; + v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f; + u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f; + v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f; + nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f); + nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f); + nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f); + nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f); + + x0 = clamp(ifloor(u0), 0, L); + y0 = clamp(ifloor(v0), 0, L); + x1 = clamp(iceil(u1), 0, L); + y1 = clamp(iceil(v1), 0, L); +#endif + + nvDebugCheck(x1 >= x0); + nvDebugCheck(y1 >= y0); + + if (x1 == x0 || y1 == y0) { + // Skip this face. + continue; + } + + + const Surface & inputFace = face[f]; + const FloatImage * inputImage = inputFace.m->image; + + for (int y = y0; y <= y1; y++) { + bool inside = false; + for (int x = x0; x <= x1; x++) { + + Vector3 dir = texelTable->direction(f, x, y); + float cosineAngle = dot(dir, filterDir); + + if (cosineAngle > cosineConeAngle) { + float solidAngle = texelTable->solidAngle(f, x, y); + //float scale = powf(saturate(cosineAngle), cosinePower); + + int idx = int(saturate(cosineAngle) * (tableSize - 1)); + float scale = filterTable[idx]; // @@ Do bilinear interpolation? + + float contribution = solidAngle * scale; + + sum += contribution; + color.x += contribution * inputImage->pixel(0, x, y, 0); + color.y += contribution * inputImage->pixel(1, x, y, 0); + color.z += contribution * inputImage->pixel(2, x, y, 0); + + inside = true; + } + else if (inside) { + // Filter scale is monotonic, if we have been inside once and we just exit, then we can skip the rest of the row. + // We could do the same thing for the columns and skip entire rows. + break; + } + } + } + } + + color *= (1.0f / sum); + + return color; +} // We want to find the alpha such that: // cos(alpha)^cosinePower = epsilon @@ -491,6 +598,7 @@ CubeSurface CubeSurface::irradianceFilter(int size, EdgeFixup fixupMethod) const // - parallelize. Done. // - use ISPC? + // Convolve filter against this cube. Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, float coneAngle, float cosinePower) { @@ -500,6 +608,15 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, Vector3 color(0); float sum = 0; + // Things I have tried to speed this up: + // - Compute accurate bounds assuming cone axis aligned to plane, result was too small elsewhere. + // - Compute ellipse that results in the cone/plane intersection and compute its bounds. Sometimes intersection is a parabolla, hard to handle that case. + // - Compute the 6 axis aligned planes that bound the cone, clip faces against planes. Resulting plane equations are way too complex. + + // What AMD CubeMapGen does: + // - Compute conservative bounds on the primary face, wrap around the adjacent faces. + + // For each texel of the input cube. for (uint f = 0; f < 6; f++) { @@ -512,163 +629,36 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, continue; } - // @@ We could do a less conservative test and test the face frustum against the cone... - // Or maybe easier: the face quad against the cone. - - // Compute bounding box of cone intersection against face. - // The intersection of the cone with the face is an elipse, we want the extents of that elipse. - // @@ Hmm... we could even rasterize an elipse! Sounds like FUN! - const int L = toI32(edgeLength-1); int x0 = 0, x1 = L; int y0 = 0, y1 = L; - if (false) { - // New approach? +#if 0 + float u0 = -1; + float u1 = 1; + float v0 = -1; + float v1 = 1; - // For each face, we are looking for 4 planes that bound the cone. + // @@ Compute uvs. - // All planes go through the origin. - // Plane fully determined by its normal. - // We only care about planes aligned to one axis. So, for the XY face, we have 4 planes: + // Expand uv coordinates from [-1,1] to [0, edgeLength) + u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f; + v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f; + u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f; + v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f; + nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f); + nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f); + nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f); + nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f); - // Plane goes through origin. - // Plane normal is unit length. - - // Plane must be tangent to cone -> - // angle between plane normal and cone axis is 90 - cone angle & 90 + cone angle - // dot(N, D) == cos(90 - cone angle) - // dot(N, D) == cos(90 + cone angle) - - // Plane must contain face UV axis - - // Find the 4 planes and how they intersect the unit face, which gives us (u0,v0, u1,v1). - - // Expand uv coordinates, clamp to - } - - // @@ Ugh. This is wrong, or only right when filterDir is aligned to one axis. - if (false) { - // uv coordinates corresponding to filterDir. - //float u = dot(filterDir, faceU[f]) / cosineFaceAngle; - //float v = dot(filterDir, faceV[f]) / cosineFaceAngle; - - // Angular coordinates corresponding to filterDir with respect to faceNormal. - float atu = atan2(dot(filterDir, faceU[f]), cosineFaceAngle); - float atv = atan2(dot(filterDir, faceV[f]), cosineFaceAngle); - - // Expand angles and project back to the face plane. - float u0 = tan(clamp(atu - coneAngle, -PI/4, PI/4)); - float v0 = tan(clamp(atv - coneAngle, -PI/4, PI/4)); - float u1 = tan(clamp(atu + coneAngle, -PI/4, PI/4)); - float v1 = tan(clamp(atv + coneAngle, -PI/4, PI/4)); - nvDebugCheck(u0 >= -1 && u0 <= 1); - nvDebugCheck(v0 >= -1 && v0 <= 1); - nvDebugCheck(u1 >= -1 && u1 <= 1); - nvDebugCheck(v1 >= -1 && v1 <= 1); - - // Expand uv coordinates from [-1,1] to [0, edgeLength) - u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f; - v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f; - u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f; - v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f; - nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f); - nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f); - nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f); - nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f); - - x0 = clamp(ifloor(u0), 0, L); - y0 = clamp(ifloor(v0), 0, L); - x1 = clamp(iceil(u1), 0, L); - y1 = clamp(iceil(v1), 0, L); - - nvDebugCheck(x1 >= x0); - nvDebugCheck(y1 >= y0); - } - - // This is elegant and all that, but the problem is that the projection is not always an ellipse, but often a parabola. - // A parabola has infinite bounds, so this approach is not very practical. Ugh. - if (false) { - //nvCheck(cosineFaceAngle >= 0.0f); @@ Not true for wide angles. - - // Focal point in cartessian coordinates: - Vector3 F = Vector3(dot(faceU[f], filterDir), dot(faceV[f], filterDir), cosineFaceAngle); - - // Focal point in polar coordinates: - Vector2 Fp = toPolar(F); - nvCheck(Fp.y >= 0.0f); // top - //nvCheck(Fp.y <= PI/2); // horizon - - // If this is an ellipse: - if (Fp.y + coneAngle < PI/2) { - nvCheck(Fp.y - coneAngle > -PI/2); - - // Major axis endpoints: - Vector2 Fa1 = toPlane(Fp.x, Fp.y - cosineFaceAngle); // near endpoint. - Vector2 Fa2 = toPlane(Fp.x, Fp.y + cosineFaceAngle); // far endpoint. - nvCheck(length(Fa1) <= length(Fa2)); - - // Ellipse center: - Vector2 Fc = (Fa1 + Fa2) * 0.5f; - - // Major radius: - float a = 0.5f * length(Fa1 - Fa2); - - // Focal point: - Vector2 F1 = toPlane(Fp.x, Fp.y); - - // If we project Fa1, Fa2, Fc, F1 onto the filter direction, then: - float da1 = dot(Fa1, F.xy()) / fabs(cosineFaceAngle); - float d1 = dot(F1, F.xy()) / fabs(cosineFaceAngle); - float dc = dot(Fc, F.xy()) / fabs(cosineFaceAngle); - float da2 = dot(Fa2, F.xy()) / fabs(cosineFaceAngle); - //nvDebug("%f <= %f <= %f <= %f (%d: %f %f | %f %f)\n", da1, d1, dc, da2, f, F.x, F.y, Fp.y - coneAngle, Fp.y + coneAngle); - //nvCheck(da1 <= d1 && d1 <= dc && dc <= da2); - - // Translate focal point relative to center: - F1 -= Fc; - - // Focal distance: - //float f = length(F1); // @@ Overriding f! - - // Minor radius: - //float b = sqrtf(a*a - f*f); - - // Second order quadric coefficients: - float A = a*a - F1.x * F1.x; - nvCheck(A >= 0); - - float B = a*a - F1.y * F1.y; - nvCheck(B >= 0); - - // Floating point bounds: - float u0 = clamp(Fc.x - sqrtf(B), -1.0f, 1.0f); - float u1 = clamp(Fc.x + sqrtf(B), -1.0f, 1.0f); - float v0 = clamp(Fc.y - sqrtf(A), -1.0f, 1.0f); - float v1 = clamp(Fc.y + sqrtf(A), -1.0f, 1.0f); - - // Expand uv coordinates from [-1,1] to [0, edgeLength) - u0 = (u0 + 1) * edgeLength * 0.5f - 0.5f; - v0 = (v0 + 1) * edgeLength * 0.5f - 0.5f; - u1 = (u1 + 1) * edgeLength * 0.5f - 0.5f; - v1 = (v1 + 1) * edgeLength * 0.5f - 0.5f; - //nvDebugCheck(u0 >= -0.5f && u0 <= edgeLength - 0.5f); - //nvDebugCheck(v0 >= -0.5f && v0 <= edgeLength - 0.5f); - //nvDebugCheck(u1 >= -0.5f && u1 <= edgeLength - 0.5f); - //nvDebugCheck(v1 >= -0.5f && v1 <= edgeLength - 0.5f); - - x0 = clamp(ifloor(u0), 0, L); - y0 = clamp(ifloor(v0), 0, L); - x1 = clamp(iceil(u1), 0, L); - y1 = clamp(iceil(v1), 0, L); - - nvDebugCheck(x1 >= x0); - nvDebugCheck(y1 >= y0); - } - - // @@ What to do with parabolas? - } + x0 = clamp(ifloor(u0), 0, L); + y0 = clamp(ifloor(v0), 0, L); + x1 = clamp(iceil(u1), 0, L); + y1 = clamp(iceil(v1), 0, L); +#endif + nvDebugCheck(x1 >= x0); + nvDebugCheck(y1 >= y0); if (x1 == x0 || y1 == y0) { // Skip this face. @@ -714,17 +704,18 @@ Vector3 CubeSurface::Private::applyCosinePowerFilter(const Vector3 & filterDir, #include "nvthread/ParallelFor.h" -struct ApplyCosinePowerFilterContext { +struct ApplyAngularFilterContext { CubeSurface::Private * inputCube; CubeSurface::Private * filteredCube; float coneAngle; - float cosinePower; + float * filterTable; + int tableSize; EdgeFixup fixupMethod; }; -void ApplyCosinePowerFilterTask(void * context, int id) +void ApplyAngularFilterTask(void * context, int id) { - ApplyCosinePowerFilterContext * ctx = (ApplyCosinePowerFilterContext *)context; + ApplyAngularFilterContext * ctx = (ApplyAngularFilterContext *)context; int size = ctx->filteredCube->edgeLength; @@ -739,7 +730,7 @@ void ApplyCosinePowerFilterTask(void * context, int id) const Vector3 filterDir = texelDirection(f, x, y, size, ctx->fixupMethod); // Convolve filter against cube. - Vector3 color = ctx->inputCube->applyCosinePowerFilter(filterDir, ctx->coneAngle, ctx->cosinePower); + Vector3 color = ctx->inputCube->applyAngularFilter(filterDir, ctx->coneAngle, ctx->filterTable, ctx->tableSize); filteredImage->pixel(0, idx) = color.x; filteredImage->pixel(1, idx) = color.y; @@ -749,8 +740,6 @@ void ApplyCosinePowerFilterTask(void * context, int id) CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const { - const uint edgeLength = m->edgeLength; - // Allocate output cube. CubeSurface filteredCube; filteredCube.m->allocate(size); @@ -782,14 +771,24 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixu } }*/ - ApplyCosinePowerFilterContext context; + ApplyAngularFilterContext context; context.inputCube = m; context.filteredCube = filteredCube.m; context.coneAngle = coneAngle; - context.cosinePower = cosinePower; context.fixupMethod = fixupMethod; - nv::ParallelFor parallelFor(ApplyCosinePowerFilterTask, &context); + context.tableSize = 512; + context.filterTable = new float[context.tableSize]; + + // @@ Instead of looking up table between [0 - 1] we should probably use [cos(coneAngle), 1] + + for (int i = 0; i < context.tableSize; i++) { + float f = float(i) / (context.tableSize - 1); + context.filterTable[i] = powf(f, cosinePower); + } + + + nv::ParallelFor parallelFor(ApplyAngularFilterTask, &context); parallelFor.run(6 * size * size); // @@ Implement edge averaging. @@ -816,6 +815,72 @@ CubeSurface CubeSurface::cosinePowerFilter(int size, float cosinePower, EdgeFixu } +// Sample cubemap in the given direction. +Vector3 CubeSurface::Private::sample(const Vector3 & dir) +{ + int f = -1; + if (fabs(dir.x) > fabs(dir.y) && fabs(dir.x) > fabs(dir.z)) { + if (dir.x > 0) f = 0; + else f = 1; + } + else if (fabs(dir.y) > fabs(dir.z)) { + if (dir.y > 0) f = 2; + else f = 3; + } + else { + if (dir.z > 0) f = 4; + else f = 5; + } + nvDebugCheck(f != -1); + + // uv coordinates corresponding to filterDir. + float u = dot(dir, faceU[f]); + float v = dot(dir, faceV[f]); + + FloatImage * img = face[f].m->image; + + Vector3 color; + color.x = img->sampleLinearClamp(0, u, v); + color.y = img->sampleLinearClamp(1, u, v); + color.z = img->sampleLinearClamp(2, u, v); + + return color; +} + +// @@ Not tested! +CubeSurface CubeSurface::fastResample(int size, EdgeFixup fixupMethod) const +{ + // Allocate output cube. + CubeSurface resampledCube; + resampledCube.m->allocate(size); + + // For each texel of the output cube. + for (uint f = 0; f < 6; f++) { + nvtt::Surface resampledFace = resampledCube.m->face[f]; + FloatImage * resampledImage = resampledFace.m->image; + + for (uint y = 0; y < uint(size); y++) { + for (uint x = 0; x < uint(size); x++) { + + const Vector3 filterDir = texelDirection(f, x, y, size, fixupMethod); + + Vector3 color = m->sample(filterDir); + + resampledImage->pixel(0, x, y, 0) = color.x; + resampledImage->pixel(1, x, y, 0) = color.y; + resampledImage->pixel(2, x, y, 0) = color.z; + } + } + } + + // @@ Implement edge averaging. Share this code with cosinePowerFilter + if (fixupMethod == EdgeFixup_Average) { + } + + return resampledCube; +} + + void CubeSurface::toLinear(float gamma) { if (isNull()) return; diff --git a/src/nvtt/CubeSurface.h b/src/nvtt/CubeSurface.h index 84df471..b5e3757 100644 --- a/src/nvtt/CubeSurface.h +++ b/src/nvtt/CubeSurface.h @@ -94,8 +94,11 @@ namespace nvtt } // Filtering helpers: + nv::Vector3 applyAngularFilter(const nv::Vector3 & dir, float coneAngle, float * filterTable, int tableSize); nv::Vector3 applyCosinePowerFilter(const nv::Vector3 & dir, float coneAngle, float cosinePower); + nv::Vector3 sample(const nv::Vector3 & dir); + uint edgeLength; Surface face[6]; TexelTable * texelTable; diff --git a/src/nvtt/InputOptions.h b/src/nvtt/InputOptions.h index 953957c..b28c8fb 100644 --- a/src/nvtt/InputOptions.h +++ b/src/nvtt/InputOptions.h @@ -1,82 +1,82 @@ -// Copyright (c) 2009-2011 Ignacio Castano -// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano -// -// Permission is hereby granted, free of charge, to any person -// obtaining a copy of this software and associated documentation -// files (the "Software"), to deal in the Software without -// restriction, including without limitation the rights to use, -// copy, modify, merge, publish, distribute, sublicense, and/or sell -// copies of the Software, and to permit persons to whom the -// Software is furnished to do so, subject to the following -// conditions: -// -// The above copyright notice and this permission notice shall be -// included in all copies or substantial portions of the Software. -// -// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES -// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT -// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, -// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING -// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR -// OTHER DEALINGS IN THE SOFTWARE. - -#ifndef NVTT_INPUTOPTIONS_H -#define NVTT_INPUTOPTIONS_H - -#include "nvtt.h" - -#include "nvmath/Vector.h" - - -namespace nvtt -{ - - struct InputOptions::Private - { - Private() : images(NULL) {} - - WrapMode wrapMode; - TextureType textureType; - InputFormat inputFormat; - AlphaMode alphaMode; - - uint width; - uint height; - uint depth; - uint faceCount; - uint mipmapCount; - uint imageCount; - - void ** images; - - // Gamma conversion. - float inputGamma; - float outputGamma; - - // Mipmap generation options. - bool generateMipmaps; - int maxLevel; - MipmapFilter mipmapFilter; - - // Kaiser filter parameters. - float kaiserWidth; - float kaiserAlpha; - float kaiserStretch; - - // Normal map options. - bool isNormalMap; - bool normalizeMipmaps; - bool convertToNormalMap; - nv::Vector4 heightFactors; - nv::Vector4 bumpFrequencyScale; - - // Adjust extents. - uint maxExtent; - RoundMode roundMode; - }; - -} // nvtt namespace - -#endif // NVTT_INPUTOPTIONS_H +// Copyright (c) 2009-2011 Ignacio Castano +// Copyright (c) 2007-2009 NVIDIA Corporation -- Ignacio Castano +// +// Permission is hereby granted, free of charge, to any person +// obtaining a copy of this software and associated documentation +// files (the "Software"), to deal in the Software without +// restriction, including without limitation the rights to use, +// copy, modify, merge, publish, distribute, sublicense, and/or sell +// copies of the Software, and to permit persons to whom the +// Software is furnished to do so, subject to the following +// conditions: +// +// The above copyright notice and this permission notice shall be +// included in all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES +// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT +// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, +// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING +// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR +// OTHER DEALINGS IN THE SOFTWARE. + +#ifndef NVTT_INPUTOPTIONS_H +#define NVTT_INPUTOPTIONS_H + +#include "nvtt.h" + +#include "nvmath/Vector.h" + + +namespace nvtt +{ + + struct InputOptions::Private + { + Private() : images(NULL) {} + + WrapMode wrapMode; + TextureType textureType; + InputFormat inputFormat; + AlphaMode alphaMode; + + uint width; + uint height; + uint depth; + uint faceCount; + uint mipmapCount; + uint imageCount; + + void ** images; + + // Gamma conversion. + float inputGamma; + float outputGamma; + + // Mipmap generation options. + bool generateMipmaps; + int maxLevel; + MipmapFilter mipmapFilter; + + // Kaiser filter parameters. + float kaiserWidth; + float kaiserAlpha; + float kaiserStretch; + + // Normal map options. + bool isNormalMap; + bool normalizeMipmaps; + bool convertToNormalMap; + nv::Vector4 heightFactors; + nv::Vector4 bumpFrequencyScale; + + // Adjust extents. + uint maxExtent; + RoundMode roundMode; + }; + +} // nvtt namespace + +#endif // NVTT_INPUTOPTIONS_H diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp index bc16696..a0534fa 100644 --- a/src/nvtt/Surface.cpp +++ b/src/nvtt/Surface.cpp @@ -40,6 +40,10 @@ #include #include // memset, memcpy +#if NV_CC_GNUC +#include // exp2f and log2f +#endif + using namespace nv; using namespace nvtt; @@ -101,6 +105,20 @@ namespace }*/ } +bool nv::canMakeNextMipmap(uint w, uint h, uint d, uint min_size) +{ + if (min_size==1u) { + if(w==1u && h==1u && d==1u) { + return false; + } + } + else if (((w <= min_size || h <= min_size) && d == 1u)) { + return false; + } + + return true; +} + uint nv::countMipmaps(uint w) { uint mipmap = 0; @@ -127,6 +145,21 @@ uint nv::countMipmaps(uint w, uint h, uint d) return mipmap + 1; } +uint nv::countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size) +{ + uint mipmap = 0; + + while (canMakeNextMipmap(w, h, d, min_size)) { + w = max(1U, w / 2); + h = max(1U, h / 2); + d = max(1U, d / 2); + mipmap++; + } + + return mipmap + 1; +} + + uint nv::computeImageSize(uint w, uint h, uint d, uint bitCount, uint pitchAlignmentInBytes, Format format) { if (format == Format_RGBA) { @@ -308,10 +341,18 @@ int Surface::countMipmaps() const return ::countMipmaps(m->image->width(), m->image->height(), 1); } +int Surface::countMipmaps(int min_size) const +{ + if (m->image == NULL) return 0; + return ::countMipmapsWithMinSize(m->image->width(), m->image->height(), 1, min_size); +} + float Surface::alphaTestCoverage(float alphaRef/*= 0.5*/) const { if (m->image == NULL) return 0.0f; + alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256); + return m->image->alphaTestCoverage(alphaRef, 3); } @@ -348,7 +389,7 @@ float Surface::average(int channel, int alpha_channel/*= -1*/, float gamma /*= 2 // Avoid division by zero. if (denom == 0.0f) return 0.0f; - return sum / denom; + return powf(sum / denom, 1.0f/gamma); } const float * Surface::data() const @@ -356,6 +397,13 @@ const float * Surface::data() const return m->image->channel(0); } +const float * Surface::channel(int i) const +{ + if (i < 0 || i > 3) return NULL; + return m->image->channel(i); +} + + void Surface::histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const { // We assume it's clear in case we want to accumulate multiple histograms. @@ -378,12 +426,14 @@ void Surface::histogram(int channel, float rangeMin, float rangeMax, int binCoun } } -void Surface::range(int channel, float * rangeMin, float * rangeMax) const +void Surface::range(int channel, float * rangeMin, float * rangeMax, int alpha_channel/*= -1*/, float alpha_ref/*= 0.f*/) const { Vector2 range(FLT_MAX, -FLT_MAX); FloatImage * img = m->image; + if (alpha_channel == -1) { // no alpha channel; just like the original range function + if (m->image != NULL) { float * c = img->channel(channel); @@ -395,6 +445,25 @@ void Surface::range(int channel, float * rangeMin, float * rangeMax) const if (f > range.y) range.y = f; } } + } + else { // use alpha test to ignore some pixels + //note, it's quite possible to get FLT_MAX,-FLT_MAX back if all pixels fail the test + + if (m->image != NULL) + { + const float * c = img->channel(channel); + const float * a = img->channel(alpha_channel); + + const uint count = img->pixelCount(); + for (uint p = 0; p < count; p++) { + if(a[p]>alpha_ref) { + float f = c[p]; + if (f < range.x) range.x = f; + if (f > range.y) range.y = f; + } + } + } + } *rangeMin = range.x; *rangeMax = range.y; @@ -423,16 +492,44 @@ bool Surface::load(const char * fileName, bool * hasAlpha/*= NULL*/) return true; } -bool Surface::save(const char * fileName) const +bool Surface::save(const char * fileName, bool hasAlpha/*=0*/, bool hdr/*=0*/) const { - if (m->image != NULL) - { - return ImageIO::saveFloat(fileName, m->image, 0, 4); + if (m->image == NULL) { + return false; } - return false; + if (hdr) { + return ImageIO::saveFloat(fileName, m->image, 0, 4); + } + else { + AutoPtr image(m->image->createImage(0, 4)); + nvCheck(image != NULL); + + if (hasAlpha) { + image->setFormat(Image::Format_ARGB); + } + + return ImageIO::save(fileName, image.ptr()); + } } + +bool Surface::setImage(int w, int h, int d) +{ + detach(); + + if (m->image == NULL) { + m->image = new FloatImage(); + } + m->image->allocate(4, w, h, d); + m->type = (d == 1) ? TextureType_2D : TextureType_3D; + + m->image->clear(); + + return true; +} + + #if 0 //NV_OS_WIN32 #include @@ -449,13 +546,14 @@ static int filter(unsigned int code, struct _EXCEPTION_POINTERS *ep) { } #define TRY __try - #define CATCH __except (filter(GetExceptionCode(), GetExceptionInformation())) -#else + +#else // 0 + #define TRY if (true) #define CATCH else -#endif +#endif bool Surface::setImage(nvtt::InputFormat format, int w, int h, int d, const void * data) { @@ -553,13 +651,13 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r, const uint8 * bsrc = (const uint8 *)b; const uint8 * asrc = (const uint8 *)a; - try { + TRY { for (int i = 0; i < count; i++) rdst[i] = float(rsrc[i]) / 255.0f; for (int i = 0; i < count; i++) gdst[i] = float(gsrc[i]) / 255.0f; for (int i = 0; i < count; i++) bdst[i] = float(bsrc[i]) / 255.0f; for (int i = 0; i < count; i++) adst[i] = float(asrc[i]) / 255.0f; } - catch(...) { + CATCH { return false; } } @@ -570,13 +668,13 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r, const uint16 * bsrc = (const uint16 *)b; const uint16 * asrc = (const uint16 *)a; - try { + TRY { for (int i = 0; i < count; i++) ((uint32 *)rdst)[i] = half_to_float(rsrc[i]); for (int i = 0; i < count; i++) ((uint32 *)gdst)[i] = half_to_float(gsrc[i]); for (int i = 0; i < count; i++) ((uint32 *)bdst)[i] = half_to_float(bsrc[i]); for (int i = 0; i < count; i++) ((uint32 *)adst)[i] = half_to_float(asrc[i]); } - catch(...) { + CATCH { return false; } } @@ -587,13 +685,13 @@ bool Surface::setImage(InputFormat format, int w, int h, int d, const void * r, const float * bsrc = (const float *)b; const float * asrc = (const float *)a; - try { + TRY { memcpy(rdst, rsrc, count * sizeof(float)); memcpy(gdst, gsrc, count * sizeof(float)); memcpy(bdst, bsrc, count * sizeof(float)); memcpy(adst, asrc, count * sizeof(float)); } - catch(...) { + CATCH { return false; } } @@ -624,87 +722,87 @@ bool Surface::setImage2D(Format format, Decoder decoder, int w, int h, const voi const uint8 * ptr = (const uint8 *)data; - try { + TRY { for (int y = 0; y < bh; y++) { for (int x = 0; x < bw; x++) { ColorBlock colors; - if (format == nvtt::Format_BC1) - { - const BlockDXT1 * block = (const BlockDXT1 *)ptr; + if (format == nvtt::Format_BC1) + { + const BlockDXT1 * block = (const BlockDXT1 *)ptr; - if (decoder == Decoder_D3D10) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_D3D9) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - else if (format == nvtt::Format_BC2) - { - const BlockDXT3 * block = (const BlockDXT3 *)ptr; + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC2) + { + const BlockDXT3 * block = (const BlockDXT3 *)ptr; - if (decoder == Decoder_D3D10) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_D3D9) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - else if (format == nvtt::Format_BC3) - { - const BlockDXT5 * block = (const BlockDXT5 *)ptr; + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC3) + { + const BlockDXT5 * block = (const BlockDXT5 *)ptr; - if (decoder == Decoder_D3D10) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_D3D9) { - block->decodeBlock(&colors, false); - } - else if (decoder == Decoder_NV5x) { - block->decodeBlockNV5x(&colors); - } - } - else if (format == nvtt::Format_BC4) - { - const BlockATI1 * block = (const BlockATI1 *)ptr; - block->decodeBlock(&colors, decoder == Decoder_D3D9); + if (decoder == Decoder_D3D10) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_D3D9) { + block->decodeBlock(&colors, false); + } + else if (decoder == Decoder_NV5x) { + block->decodeBlockNV5x(&colors); + } + } + else if (format == nvtt::Format_BC4) + { + const BlockATI1 * block = (const BlockATI1 *)ptr; + block->decodeBlock(&colors, decoder == Decoder_D3D9); + } + else if (format == nvtt::Format_BC5) + { + const BlockATI2 * block = (const BlockATI2 *)ptr; + block->decodeBlock(&colors, decoder == Decoder_D3D9); + } + + for (int yy = 0; yy < 4; yy++) + { + for (int xx = 0; xx < 4; xx++) + { + Color32 c = colors.color(xx, yy); + + if (x * 4 + xx < w && y * 4 + yy < h) + { + m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f; + m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f; + m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f; + m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f; + } + } + } + + ptr += bs; + } } - else if (format == nvtt::Format_BC5) - { - const BlockATI2 * block = (const BlockATI2 *)ptr; - block->decodeBlock(&colors, decoder == Decoder_D3D9); - } - - for (int yy = 0; yy < 4; yy++) - { - for (int xx = 0; xx < 4; xx++) - { - Color32 c = colors.color(xx, yy); - - if (x * 4 + xx < w && y * 4 + yy < h) - { - m->image->pixel(0, x*4 + xx, y*4 + yy, 0) = float(c.r) * 1.0f/255.0f; - m->image->pixel(1, x*4 + xx, y*4 + yy, 0) = float(c.g) * 1.0f/255.0f; - m->image->pixel(2, x*4 + xx, y*4 + yy, 0) = float(c.b) * 1.0f/255.0f; - m->image->pixel(3, x*4 + xx, y*4 + yy, 0) = float(c.a) * 1.0f/255.0f; - } - } - } - - ptr += bs; - } - } } - catch(...) { + CATCH { return false; } @@ -812,6 +910,43 @@ void Surface::resize(int w, int h, int d, ResizeFilter filter, float filterWidth m->image = img; } +void Surface::resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter) +{ + if (isNull()) return; + + float filterWidth; + float params[2]; + getDefaultFilterWidthAndParams(filter, &filterWidth, params); + + int w = m->image->width(); + int h = m->image->height(); + int d = m->image->depth(); + + getTargetExtent(&w, &h, &d, maxExtent, roundMode, m->type); + + if (m->type == TextureType_2D) + { + nvDebugCheck(d==1); + int md = nv::min(w,h); + w = md; + h = md; + } + else if (m->type == TextureType_Cube) + { + nvDebugCheck(d==1); + nvDebugCheck(w==h); + } + else if (m->type == TextureType_3D) + { + int md = nv::min(nv::min(w,h),d); + w = md; + h = md; + d = md; + } + + resize(w, h, d, filter, filterWidth, params); +} + void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter) { float filterWidth; @@ -834,18 +969,26 @@ void Surface::resize(int maxExtent, RoundMode roundMode, ResizeFilter filter, fl resize(w, h, d, filter, filterWidth, params); } -bool Surface::buildNextMipmap(MipmapFilter filter) +bool Surface::canMakeNextMipmap(int min_size /*= 1*/) +{ + if (isNull()) return false; + + return nv::canMakeNextMipmap(width(), height(), depth(), min_size); +} + + +bool Surface::buildNextMipmap(MipmapFilter filter, int min_size /*= 1*/) { float filterWidth; float params[2]; getDefaultFilterWidthAndParams(filter, &filterWidth, params); - return buildNextMipmap(filter, filterWidth, params); + return buildNextMipmap(filter, filterWidth, params, min_size); } -bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params) +bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params, int min_size /*= 1*/) { - if (isNull() || (width() == 1 && height() == 1 && depth() == 1)) { + if (!canMakeNextMipmap(min_size)) { return false; } @@ -907,6 +1050,30 @@ bool Surface::buildNextMipmap(MipmapFilter filter, float filterWidth, const floa return true; } +bool Surface::buildNextMipmapSolidColor(const float * const color_components) +{ + if (isNull() || (width() == 1 && height() == 1 && depth() == 1)) { + return false; + } + + detach(); + + FloatImage * img = new FloatImage(); + const uint w = max(1, m->image->m_width / 2); + const uint h = max(1, m->image->m_height / 2); + img->allocate(m->image->m_componentCount, w, h); + + for(uint c = 0; c < img->m_componentCount; c++) + { + img->clear(c, color_components[c]); + } + + delete m->image; + m->image = img; + + return true; +} + void Surface::canvasSize(int w, int h, int d) { nvDebugCheck(w > 0 && h > 0 && d > 0); @@ -1083,6 +1250,7 @@ void Surface::transform(const float w0[4], const float w1[4], const float w2[4], m->image->transform(0, xform, voffset); } +// R, G, B, A, 1, 0, -1 void Surface::swizzle(int r, int g, int b, int a) { if (isNull()) return; @@ -1113,52 +1281,6 @@ void Surface::clamp(int channel, float low, float high) m->image->clamp(channel, 1, low, high); } -void Surface::packNormal() -{ - if (isNull()) return; - - detach(); - - m->image->scaleBias(0, 3, 0.5f, 0.5f); -} - -void Surface::expandNormal() -{ - if (isNull()) return; - - detach(); - - m->image->scaleBias(0, 3, 2.0f, -1.0f); -} - -// Create a Toksvig map for this normal map. -// http://blog.selfshadow.com/2011/07/22/specular-showdown/ -// @@ Assumes this is a normal map expanded in the [-1, 1] range. -Surface Surface::createToksvigMap(float power) const -{ - if (isNull()) return Surface(); - - // @@ TODO - - return Surface(); -} - -// @@ Should I add support for LEAN maps? That requires 5 terms, which would have to be encoded in two textures. -// There's nothing stopping us from having 5 channels in a surface, and then, let the user swizzle them as they wish. -// CLEAN maps are probably more practical, though. -// http://www.cs.umbc.edu/~olano/papers/lean/ -// http://gaim.umbc.edu/2011/07/24/shiny-and-clean/ -// http://gaim.umbc.edu/2011/07/26/on-error/ -NVTT_API Surface Surface::createCleanMap() const -{ - if (isNull()) return Surface(); - - // @@ TODO - - return Surface(); -} - - void Surface::blend(float red, float green, float blue, float alpha, float t) { if (isNull()) return; @@ -1285,13 +1407,10 @@ void Surface::fill(float red, float green, float blue, float alpha) float * a = img->channel(3); const uint count = img->pixelCount(); - for (uint i = 0; i < count; i++) - { - r[i] = red; - g[i] = green; - b[i] = blue; - a[i] = alpha; - } + for (uint i = 0; i < count; i++) r[i] = red; + for (uint i = 0; i < count; i++) g[i] = green; + for (uint i = 0; i < count; i++) b[i] = blue; + for (uint i = 0; i < count; i++) a[i] = alpha; } @@ -1301,6 +1420,8 @@ void Surface::scaleAlphaToCoverage(float coverage, float alphaRef/*= 0.5f*/) detach(); + alphaRef = nv::clamp(alphaRef, 1.0f/256, 255.0f/256); + m->image->scaleAlphaToCoverage(coverage, alphaRef, 3); } @@ -1341,7 +1462,6 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/) detach(); threshold = ::clamp(threshold, 1e-6f, 1.0f); - float irange = 1.0f / range; FloatImage * img = m->image; float * r = img->channel(0); @@ -1360,6 +1480,7 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/) r[i] = R / M; g[i] = G / M; b[i] = B / M; + a[i] = (M - threshold) / (1 - threshold); #else @@ -1402,6 +1523,7 @@ void Surface::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/) } } + void Surface::fromRGBM(float range/*= 1*/) { if (isNull()) return; @@ -1425,6 +1547,37 @@ void Surface::fromRGBM(float range/*= 1*/) } } +// This is dumb way to encode luminance only values. +void Surface::toLM(float range/*= 1*/, float threshold/*= 0.25*/) +{ + if (isNull()) return; + + detach(); + + threshold = ::clamp(threshold, 1e-6f, 1.0f); + + FloatImage * img = m->image; + float * r = img->channel(0); + float * g = img->channel(1); + float * b = img->channel(2); + float * a = img->channel(3); + + const uint count = img->pixelCount(); + for (uint i = 0; i < count; i++) { + float R = nv::clamp(r[i], 0.0f, 1.0f); + float G = nv::clamp(g[i], 0.0f, 1.0f); + float B = nv::clamp(b[i], 0.0f, 1.0f); + + float M = max(max(R, G), max(B, threshold)); + + float L = (R + G + B) / 3; + r[i] = L / M; + b[i] = L / M; + g[i] = L / M; + a[i] = (M - threshold) / (1 - threshold); + } +} + static Color32 toRgbe8(float r, float g, float b) { @@ -2147,21 +2300,25 @@ void Surface::quantize(int channel, int bits, bool exactEndPoints, bool dither) FloatImage * img = m->image; - float scale, offset; + float scale, offset0, offset1; if (exactEndPoints) { + // floor(x*(range-1) + 0.5) / (range-1) scale = float((1 << bits) - 1); - offset = 0.0f; + offset0 = 0.5f; + offset1 = 0.0f; } else { + // (floor(x*range) + 0.5) / range scale = float(1 << bits); - offset = 0.5f; + offset0 = 0.0f; + offset1 = 0.5f; } if (!dither) { float * c = img->channel(channel); const uint count = img->pixelCount(); for (uint i = 0; i < count; i++) { - c[i] = floorf(c[i] * scale + offset) / scale; + c[i] = saturate((floorf(c[i] * scale + offset0) + offset1) / scale); } } else { @@ -2182,7 +2339,7 @@ void Surface::quantize(int channel, int bits, bool exactEndPoints, bool dither) float & f = img->pixel(channel, x, y, 0); // Add error and quantize. - float qf = floorf((f + row0[1+x]) * scale + offset) / scale; + float qf = saturate((floorf((f + row0[1+x]) * scale + offset0) + offset1) / scale); // Compute new error: float diff = f - qf; @@ -2221,9 +2378,6 @@ void Surface::toNormalMap(float sm, float medium, float big, float large) const FloatImage * img = m->image; m->image = nv::createNormalMap(img, (FloatImage::WrapMode)m->wrapMode, filterWeights); -#pragma NV_MESSAGE("TODO: Pack and expand normals explicitly?") - m->image->packNormals(0); - delete img; m->isNormalMap = true; @@ -2246,7 +2400,6 @@ void Surface::transformNormals(NormalTransform xform) detach(); FloatImage * img = m->image; - img->expandNormals(0); const uint count = img->pixelCount(); for (uint i = 0; i < count; i++) { @@ -2308,8 +2461,6 @@ void Surface::transformNormals(NormalTransform xform) y = n.y; z = n.z; } - - img->packNormals(0); } void Surface::reconstructNormals(NormalTransform xform) @@ -2319,7 +2470,6 @@ void Surface::reconstructNormals(NormalTransform xform) detach(); FloatImage * img = m->image; - img->expandNormals(0); const uint count = img->pixelCount(); for (uint i = 0; i < count; i++) { @@ -2357,8 +2507,6 @@ void Surface::reconstructNormals(NormalTransform xform) y = n.y; z = n.z; } - - img->packNormals(0); } void Surface::toCleanNormalMap() @@ -2367,8 +2515,6 @@ void Surface::toCleanNormalMap() detach(); - m->image->expandNormals(0); - const uint count = m->image->pixelCount(); for (uint i = 0; i < count; i++) { float x = m->image->pixel(0, i); @@ -2376,22 +2522,48 @@ void Surface::toCleanNormalMap() m->image->pixel(2, i) = x*x + y*y; } - - m->image->packNormals(0); } // [-1,1] -> [ 0,1] -void Surface::packNormals() { +void Surface::packNormals(float scale/*= 0.5f*/, float bias/*= 0.5f*/) { if (isNull()) return; detach(); - m->image->packNormals(0); + m->image->scaleBias(0, 3, scale, bias); } // [ 0,1] -> [-1,1] -void Surface::expandNormals() { +void Surface::expandNormals(float scale/*= 2.0f*/, float bias/*= - 2.0f * 127.0f / 255.0f*/) { if (isNull()) return; detach(); - m->image->expandNormals(0); + m->image->scaleBias(0, 3, scale, bias); +} + + +// Create a Toksvig map for this normal map. +// http://blog.selfshadow.com/2011/07/22/specular-showdown/ +// @@ Assumes this is a normal map expanded in the [-1, 1] range. +Surface Surface::createToksvigMap(float power) const +{ + if (isNull()) return Surface(); + + // @@ TODO + + return Surface(); +} + +// @@ Should I add support for LEAN maps? That requires 5 terms, which would have to be encoded in two textures. +// There's nothing stopping us from having 5 channels in a surface, and then, let the user swizzle them as they wish. +// CLEAN maps are probably more practical, though. +// http://www.cs.umbc.edu/~olano/papers/lean/ +// http://gaim.umbc.edu/2011/07/24/shiny-and-clean/ +// http://gaim.umbc.edu/2011/07/26/on-error/ +NVTT_API Surface Surface::createCleanMap() const +{ + if (isNull()) return Surface(); + + // @@ TODO + + return Surface(); } @@ -2422,7 +2594,7 @@ void Surface::flipZ() m->image->flipZ(); } -Surface Surface::subImage(int x0, int x1, int y0, int y1, int z0, int z1) const +Surface Surface::createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const { Surface s; @@ -2495,9 +2667,6 @@ bool Surface::addChannel(const Surface & srcImage, int srcChannel, int dstChanne dst = m->image; - const uint w = src->width(); - const uint h = src->height(); - float * d = dst->channel(dstChannel); const float * s = src->channel(srcChannel); @@ -2510,6 +2679,38 @@ bool Surface::addChannel(const Surface & srcImage, int srcChannel, int dstChanne } +bool Surface::copy(const Surface & srcImage, int xsrc, int ysrc, int zsrc, int xsize, int ysize, int zsize, int xdst, int ydst, int zdst) +{ + if (xsrc < 0 || ysrc < 0 || zsrc < 0) return false; + if (xdst < 0 || ydst < 0 || zdst < 0) return false; + + FloatImage * dst = m->image; + const FloatImage * src = srcImage.m->image; + + if (toU32(xsrc + xsize) > src->width() || toU32(ysrc + ysize) > src->height() || toU32(zsrc + zsize) > src->depth()) return false; + if (toU32(xdst + xsize) > dst->width() || toU32(ydst + ysize) > dst->height() || toU32(zdst + zsize) > dst->depth()) return false; + + detach(); + + // For each channel. + for(int i = 0; i < 4; i++) { + float * d = dst->channel(i); + const float * s = src->channel(i); + + // Copy region from src to dst. + for (int z = 0; z < zsize; z++) { + for (int y = 0; y < ysize; y++) { + for (int x = 0; x < xsize; x++) { + d[dst->index(xdst + x, ydst + y, zdst + z)] = s[src->index(xsrc + x, ysrc + y, zsrc + z)]; + } + } + } + } + + return true; +} + + float nvtt::rmsError(const Surface & reference, const Surface & image) { diff --git a/src/nvtt/Surface.h b/src/nvtt/Surface.h index 6013995..419a0e1 100644 --- a/src/nvtt/Surface.h +++ b/src/nvtt/Surface.h @@ -78,8 +78,10 @@ namespace nvtt } // nvtt namespace namespace nv { + bool canMakeNextMipmap(uint w, uint h, uint d, uint min_size); uint countMipmaps(uint w); uint countMipmaps(uint w, uint h, uint d); + uint countMipmapsWithMinSize(uint w, uint h, uint d, uint min_size); uint computeImageSize(uint w, uint h, uint d, uint bitCount, uint alignmentInBytes, nvtt::Format format); void getTargetExtent(int * w, int * h, int * d, int maxExtent, nvtt::RoundMode roundMode, nvtt::TextureType textureType); } diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h index ce5cb1f..c6c6aec 100644 --- a/src/nvtt/nvtt.h +++ b/src/nvtt/nvtt.h @@ -454,15 +454,18 @@ namespace nvtt NVTT_API AlphaMode alphaMode() const; NVTT_API bool isNormalMap() const; NVTT_API int countMipmaps() const; + NVTT_API int countMipmaps(int min_size) const; NVTT_API float alphaTestCoverage(float alphaRef = 0.5) const; NVTT_API float average(int channel, int alpha_channel = -1, float gamma = 2.2f) const; NVTT_API const float * data() const; + NVTT_API const float * channel(int i) const; NVTT_API void histogram(int channel, float rangeMin, float rangeMax, int binCount, int * binPtr) const; - NVTT_API void range(int channel, float * rangeMin, float * rangeMax) const; + NVTT_API void range(int channel, float * rangeMin, float * rangeMax, int alpha_channel = -1, float alpha_ref = 0.f) const; // Texture data. NVTT_API bool load(const char * fileName, bool * hasAlpha = 0); - NVTT_API bool save(const char * fileName) const; + NVTT_API bool save(const char * fileName, bool hasAlpha = 0, bool hdr = 0) const; + NVTT_API bool setImage(int w, int h, int d); NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * data); NVTT_API bool setImage(InputFormat format, int w, int h, int d, const void * r, const void * g, const void * b, const void * a); NVTT_API bool setImage2D(Format format, Decoder decoder, int w, int h, const void * data); @@ -472,9 +475,14 @@ namespace nvtt NVTT_API void resize(int w, int h, int d, ResizeFilter filter, float filterWidth, const float * params = 0); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); - NVTT_API bool buildNextMipmap(MipmapFilter filter); - NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0); + NVTT_API void resize_make_square(int maxExtent, RoundMode roundMode, ResizeFilter filter); + + NVTT_API bool buildNextMipmap(MipmapFilter filter, int min_size = 1); + NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0, int min_size = 1); + NVTT_API bool buildNextMipmapSolidColor(const float * const color_components); NVTT_API void canvasSize(int w, int h, int d); + // associated to resizing: + NVTT_API bool canMakeNextMipmap(int min_size = 1); // Color transforms. NVTT_API void toLinear(float gamma); @@ -488,17 +496,15 @@ namespace nvtt NVTT_API void swizzle(int r, int g, int b, int a); NVTT_API void scaleBias(int channel, float scale, float bias); NVTT_API void clamp(int channel, float low = 0.0f, float high = 1.0f); - NVTT_API void packNormal(); - NVTT_API void expandNormal(); NVTT_API void blend(float r, float g, float b, float a, float t); NVTT_API void premultiplyAlpha(); NVTT_API void toGreyScale(float redScale, float greenScale, float blueScale, float alphaScale); NVTT_API void setBorder(float r, float g, float b, float a); NVTT_API void fill(float r, float g, float b, float a); NVTT_API void scaleAlphaToCoverage(float coverage, float alphaRef = 0.5f); - //NVTT_API bool normalizeRange(float * rangeMin, float * rangeMax); NVTT_API void toRGBM(float range = 1.0f, float threshold = 0.0f); NVTT_API void fromRGBM(float range = 1.0f); + NVTT_API void toLM(float range = 1.0f, float threshold = 0.0f); NVTT_API void toRGBE(int mantissaBits, int exponentBits); NVTT_API void fromRGBE(int mantissaBits, int exponentBits); NVTT_API void toYCoCg(); @@ -519,14 +525,14 @@ namespace nvtt NVTT_API void binarize(int channel, float threshold, bool dither); NVTT_API void quantize(int channel, int bits, bool exactEndPoints, bool dither); - // Normal map transforms. @@ All these methods assume packed normals. + // Normal map transforms. NVTT_API void toNormalMap(float sm, float medium, float big, float large); NVTT_API void normalizeNormalMap(); NVTT_API void transformNormals(NormalTransform xform); NVTT_API void reconstructNormals(NormalTransform xform); NVTT_API void toCleanNormalMap(); - NVTT_API void packNormals(); // [-1,1] -> [ 0,1] - NVTT_API void expandNormals(); // [ 0,1] -> [-1,1] + NVTT_API void packNormals(float scale = 0.5f, float bias = 0.5f); // [-1,1] -> [ 0,1] + NVTT_API void expandNormals(float scale = 2.0f, float bias = -1.0f); // [ 0,1] -> [-1,1] NVTT_API Surface createToksvigMap(float power) const; NVTT_API Surface createCleanMap() const; @@ -534,7 +540,7 @@ namespace nvtt NVTT_API void flipX(); NVTT_API void flipY(); NVTT_API void flipZ(); - NVTT_API Surface subImage(int x0, int x1, int y0, int y1, int z0, int z1) const; + NVTT_API Surface createSubImage(int x0, int x1, int y0, int y1, int z0, int z1) const; // Copy image data. NVTT_API bool copyChannel(const Surface & srcImage, int srcChannel); @@ -542,6 +548,9 @@ namespace nvtt NVTT_API bool addChannel(const Surface & img, int srcChannel, int dstChannel, float scale); + NVTT_API bool copy(const Surface & src, int xsrc, int ysrc, int zsrc, int xsize, int ysize, int zsize, int xdst, int ydst, int zdst); + + //private: void detach(); @@ -599,12 +608,15 @@ namespace nvtt NVTT_API float average(int channel) const; NVTT_API void range(int channel, float * minimum_ptr, float * maximum_ptr) const; + NVTT_API void clamp(int channel, float low = 0.0f, float high = 1.0f); // Filtering. NVTT_API CubeSurface irradianceFilter(int size, EdgeFixup fixupMethod) const; NVTT_API CubeSurface cosinePowerFilter(int size, float cosinePower, EdgeFixup fixupMethod) const; + NVTT_API CubeSurface fastResample(int size, EdgeFixup fixupMethod) const; + /* NVTT_API void resize(int w, int h, ResizeFilter filter); diff --git a/src/nvtt/tests/testsuite.cpp b/src/nvtt/tests/testsuite.cpp index dfd8e3b..184e5cb 100644 --- a/src/nvtt/tests/testsuite.cpp +++ b/src/nvtt/tests/testsuite.cpp @@ -856,7 +856,7 @@ int main(int argc, char *argv[]) outputFileName.stripExtension(); if (set.type == ImageType_HDR) outputFileName.append(".dds"); else outputFileName.append(".tga"); - if (!img_out.save(outputFileName.str())) + if (!img_out.save(outputFileName.str(), set.type == ImageType_RGBA, set.type == ImageType_HDR)) { printf("Error saving file '%s'.\n", outputFileName.str()); } diff --git a/src/nvtt/tools/cmdline.h b/src/nvtt/tools/cmdline.h index 14878ef..7617ae7 100644 --- a/src/nvtt/tools/cmdline.h +++ b/src/nvtt/tools/cmdline.h @@ -58,7 +58,7 @@ struct MyAssertHandler : public nv::AssertHandler { } // Handler method, note that func might be NULL! - virtual int assertion( const char *exp, const char *file, int line, const char *func ) { + virtual int assertion( const char *exp, const char *file, int line, const char *func, const char *msg, va_list arg ) { fprintf(stderr, "Assertion failed: %s\nIn %s:%d\n", exp, file, line); nv::debug::dumpInfo(); exit(1);