Merge changes from The Witness.
This commit is contained in:
parent
3b4fcd0369
commit
04bdc76749
@ -290,11 +290,23 @@ namespace nv
|
|||||||
template <typename T>
|
template <typename T>
|
||||||
NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
|
NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
|
||||||
{
|
{
|
||||||
destroy_range(m_buffer, count, m_size);
|
#if 1 // More simple, but maybe not be as efficient?
|
||||||
|
destroy_range(m_buffer, 0, m_size);
|
||||||
|
|
||||||
setArraySize(count);
|
setArraySize(count);
|
||||||
|
|
||||||
::nv::copy(m_buffer, data, count);
|
construct_range(m_buffer, count, 0, data);
|
||||||
|
#else
|
||||||
|
const uint old_size = m_size;
|
||||||
|
|
||||||
|
destroy_range(m_buffer, count, old_size);
|
||||||
|
|
||||||
|
setArraySize(count);
|
||||||
|
|
||||||
|
copy_range(m_buffer, data, old_size);
|
||||||
|
|
||||||
|
construct_range(m_buffer, count, old_size, data);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
// Assignment operator.
|
// Assignment operator.
|
||||||
|
@ -172,48 +172,53 @@ namespace
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
MINIDUMP_EXCEPTION_INFORMATION ExInfo;
|
MINIDUMP_EXCEPTION_INFORMATION * pExInfo = NULL;
|
||||||
ExInfo.ThreadId = ::GetCurrentThreadId();
|
MINIDUMP_CALLBACK_INFORMATION * pCallback = NULL;
|
||||||
ExInfo.ExceptionPointers = pExceptionInfo;
|
|
||||||
ExInfo.ClientPointers = NULL;
|
|
||||||
|
|
||||||
MINIDUMP_CALLBACK_INFORMATION callback;
|
if (pExceptionInfo != NULL) {
|
||||||
MINIDUMP_CALLBACK_INFORMATION * callback_pointer = NULL;
|
MINIDUMP_EXCEPTION_INFORMATION ExInfo;
|
||||||
MinidumpCallbackContext context;
|
ExInfo.ThreadId = ::GetCurrentThreadId();
|
||||||
|
ExInfo.ExceptionPointers = pExceptionInfo;
|
||||||
|
ExInfo.ClientPointers = NULL;
|
||||||
|
pExInfo = &ExInfo;
|
||||||
|
|
||||||
// Find a memory region of 256 bytes centered on the
|
MINIDUMP_CALLBACK_INFORMATION callback;
|
||||||
// faulting instruction pointer.
|
MinidumpCallbackContext context;
|
||||||
const ULONG64 instruction_pointer =
|
|
||||||
#if defined(_M_IX86)
|
|
||||||
pExceptionInfo->ContextRecord->Eip;
|
|
||||||
#elif defined(_M_AMD64)
|
|
||||||
pExceptionInfo->ContextRecord->Rip;
|
|
||||||
#else
|
|
||||||
#error Unsupported platform
|
|
||||||
#endif
|
|
||||||
|
|
||||||
MEMORY_BASIC_INFORMATION info;
|
// Find a memory region of 256 bytes centered on the
|
||||||
|
// faulting instruction pointer.
|
||||||
if (VirtualQuery(reinterpret_cast<LPCVOID>(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT)
|
const ULONG64 instruction_pointer =
|
||||||
{
|
#if defined(_M_IX86)
|
||||||
// Attempt to get 128 bytes before and after the instruction
|
pExceptionInfo->ContextRecord->Eip;
|
||||||
// pointer, but settle for whatever's available up to the
|
#elif defined(_M_AMD64)
|
||||||
// boundaries of the memory region.
|
pExceptionInfo->ContextRecord->Rip;
|
||||||
const ULONG64 kIPMemorySize = 256;
|
#else
|
||||||
context.memory_base = max(reinterpret_cast<ULONG64>(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2));
|
#error Unsupported platform
|
||||||
ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast<ULONG64>(info.BaseAddress) + info.RegionSize);
|
#endif
|
||||||
context.memory_size = static_cast<ULONG>(end_of_range - context.memory_base);
|
|
||||||
context.finished = false;
|
|
||||||
|
|
||||||
callback.CallbackRoutine = miniDumpWriteDumpCallback;
|
MEMORY_BASIC_INFORMATION info;
|
||||||
callback.CallbackParam = reinterpret_cast<void*>(&context);
|
|
||||||
callback_pointer = &callback;
|
if (VirtualQuery(reinterpret_cast<LPCVOID>(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT)
|
||||||
|
{
|
||||||
|
// Attempt to get 128 bytes before and after the instruction
|
||||||
|
// pointer, but settle for whatever's available up to the
|
||||||
|
// boundaries of the memory region.
|
||||||
|
const ULONG64 kIPMemorySize = 256;
|
||||||
|
context.memory_base = max(reinterpret_cast<ULONG64>(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2));
|
||||||
|
ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast<ULONG64>(info.BaseAddress) + info.RegionSize);
|
||||||
|
context.memory_size = static_cast<ULONG>(end_of_range - context.memory_base);
|
||||||
|
context.finished = false;
|
||||||
|
|
||||||
|
callback.CallbackRoutine = miniDumpWriteDumpCallback;
|
||||||
|
callback.CallbackParam = reinterpret_cast<void*>(&context);
|
||||||
|
pCallback = &callback;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
MINIDUMP_TYPE miniDumpType = (MINIDUMP_TYPE)(MiniDumpNormal|MiniDumpWithHandleData|MiniDumpWithThreadInfo);
|
MINIDUMP_TYPE miniDumpType = (MINIDUMP_TYPE)(MiniDumpNormal|MiniDumpWithHandleData|MiniDumpWithThreadInfo);
|
||||||
|
|
||||||
// write the dump
|
// write the dump
|
||||||
BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, &ExInfo, NULL, callback_pointer) != 0;
|
BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, pExInfo, NULL, pCallback) != 0;
|
||||||
CloseHandle(hFile);
|
CloseHandle(hFile);
|
||||||
|
|
||||||
if (ok == FALSE) {
|
if (ok == FALSE) {
|
||||||
@ -402,9 +407,8 @@ namespace
|
|||||||
// Write mini dump and print stack trace.
|
// Write mini dump and print stack trace.
|
||||||
static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo)
|
static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo)
|
||||||
{
|
{
|
||||||
#if USE_SEPARATE_THREAD
|
|
||||||
EnterCriticalSection(&s_handler_critical_section);
|
EnterCriticalSection(&s_handler_critical_section);
|
||||||
|
#if USE_SEPARATE_THREAD
|
||||||
s_requesting_thread_id = GetCurrentThreadId();
|
s_requesting_thread_id = GetCurrentThreadId();
|
||||||
s_exception_info = pExceptionInfo;
|
s_exception_info = pExceptionInfo;
|
||||||
|
|
||||||
@ -418,12 +422,11 @@ namespace
|
|||||||
// Clean up.
|
// Clean up.
|
||||||
s_requesting_thread_id = 0;
|
s_requesting_thread_id = 0;
|
||||||
s_exception_info = NULL;
|
s_exception_info = NULL;
|
||||||
|
|
||||||
LeaveCriticalSection(&s_handler_critical_section);
|
|
||||||
#else
|
#else
|
||||||
// First of all, write mini dump.
|
// First of all, write mini dump.
|
||||||
writeMiniDump(pExceptionInfo);
|
writeMiniDump(pExceptionInfo);
|
||||||
#endif
|
#endif
|
||||||
|
LeaveCriticalSection(&s_handler_critical_section);
|
||||||
|
|
||||||
nvDebug("\nDump file saved.\n");
|
nvDebug("\nDump file saved.\n");
|
||||||
|
|
||||||
@ -454,62 +457,21 @@ namespace
|
|||||||
fclose(fp);
|
fclose(fp);
|
||||||
}
|
}
|
||||||
|
|
||||||
return EXCEPTION_EXECUTE_HANDLER; // Terminate app.
|
// This should terminate the process and set the error exit code.
|
||||||
|
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 2);
|
||||||
|
|
||||||
|
return EXCEPTION_EXECUTE_HANDLER; // Terminate app. In case terminate process did not succeed.
|
||||||
}
|
}
|
||||||
|
|
||||||
/*static void handlePureVirtualCall() {
|
static void handlePureVirtualCall() {
|
||||||
// This is an pure virtual function call, not an exception. It's safe to
|
nvDebugBreak();
|
||||||
// play with sprintf here.
|
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
|
||||||
AutoExceptionHandler auto_exception_handler;
|
}
|
||||||
ExceptionHandler* current_handler = auto_exception_handler.get_handler();
|
|
||||||
|
|
||||||
MDRawAssertionInfo assertion;
|
static void handleInvalidParameter(const wchar_t * expresion, const wchar_t * function, const wchar_t * file, unsigned int line, uintptr_t reserved) {
|
||||||
memset(&assertion, 0, sizeof(assertion));
|
nvDebugBreak();
|
||||||
assertion.type = MD_ASSERTION_INFO_TYPE_PURE_VIRTUAL_CALL;
|
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
|
||||||
|
}
|
||||||
// Make up an exception record for the current thread and CPU context
|
|
||||||
// to make it possible for the crash processor to classify these
|
|
||||||
// as do regular crashes, and to make it humane for developers to
|
|
||||||
// analyze them.
|
|
||||||
EXCEPTION_RECORD exception_record = {};
|
|
||||||
CONTEXT exception_context = {};
|
|
||||||
EXCEPTION_POINTERS exception_ptrs = { &exception_record, &exception_context };
|
|
||||||
|
|
||||||
::RtlCaptureContext(&exception_context);
|
|
||||||
|
|
||||||
exception_record.ExceptionCode = STATUS_NONCONTINUABLE_EXCEPTION;
|
|
||||||
|
|
||||||
// We store pointers to the the expression and function strings,
|
|
||||||
// and the line as exception parameters to make them easy to
|
|
||||||
// access by the developer on the far side.
|
|
||||||
exception_record.NumberParameters = 3;
|
|
||||||
exception_record.ExceptionInformation[0] = reinterpret_cast<ULONG_PTR>(&assertion.expression);
|
|
||||||
exception_record.ExceptionInformation[1] = reinterpret_cast<ULONG_PTR>(&assertion.file);
|
|
||||||
exception_record.ExceptionInformation[2] = assertion.line;
|
|
||||||
|
|
||||||
bool success = false;
|
|
||||||
// In case of out-of-process dump generation, directly call
|
|
||||||
// WriteMinidumpWithException since there is no separate thread running.
|
|
||||||
|
|
||||||
success = current_handler->WriteMinidumpOnHandlerThread(&exception_ptrs, &assertion);
|
|
||||||
|
|
||||||
if (!success) {
|
|
||||||
if (current_handler->previous_pch_) {
|
|
||||||
// The handler didn't fully handle the exception. Give it to the
|
|
||||||
// previous purecall handler.
|
|
||||||
current_handler->previous_pch_();
|
|
||||||
else {
|
|
||||||
// If there's no previous handler, return and let _purecall handle it.
|
|
||||||
// This will just put up an assertion dialog.
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// The handler either took care of the invalid parameter problem itself,
|
|
||||||
// or passed it on to another handler. "Swallow" it by exiting, paralleling
|
|
||||||
// the behavior of "swallowing" exceptions.
|
|
||||||
exit(0);
|
|
||||||
}*/
|
|
||||||
|
|
||||||
|
|
||||||
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN
|
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN
|
||||||
@ -755,8 +717,8 @@ namespace
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (ret == NV_ABORT_EXIT) {
|
if (ret == NV_ABORT_EXIT) {
|
||||||
// Exit cleanly.
|
// Exit cleanly.
|
||||||
throw "Assertion failed";
|
exit(EXIT_FAILURE + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -788,7 +750,7 @@ namespace
|
|||||||
|
|
||||||
if( ret == NV_ABORT_EXIT ) {
|
if( ret == NV_ABORT_EXIT ) {
|
||||||
// Exit cleanly.
|
// Exit cleanly.
|
||||||
throw "Assertion failed";
|
exit(EXIT_FAILURE + 1);
|
||||||
}
|
}
|
||||||
|
|
||||||
return ret;
|
return ret;
|
||||||
@ -825,7 +787,7 @@ namespace
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
// Exit cleanly.
|
// Exit cleanly.
|
||||||
throw "Assertion failed";
|
exit(EXIT_FAILURE + 1);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -853,6 +815,38 @@ int nvAbort(const char * exp, const char * file, int line, const char * func/*=N
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Abnormal termination. Create mini dump and output call stack.
|
||||||
|
void debug::terminate(int code)
|
||||||
|
{
|
||||||
|
EnterCriticalSection(&s_handler_critical_section);
|
||||||
|
|
||||||
|
writeMiniDump(NULL);
|
||||||
|
|
||||||
|
const int max_stack_size = 64;
|
||||||
|
void * trace[max_stack_size];
|
||||||
|
int size = backtrace(trace, max_stack_size);
|
||||||
|
|
||||||
|
// @@ Use win32's CreateFile?
|
||||||
|
FILE * fp = fileOpen("crash.txt", "wb");
|
||||||
|
if (fp != NULL) {
|
||||||
|
Array<const char *> lines;
|
||||||
|
writeStackTrace(trace, size, 0, lines);
|
||||||
|
|
||||||
|
for (uint i = 0; i < lines.count(); i++) {
|
||||||
|
fputs(lines[i], fp);
|
||||||
|
delete lines[i];
|
||||||
|
}
|
||||||
|
|
||||||
|
// @@ Add more info to crash.txt?
|
||||||
|
|
||||||
|
fclose(fp);
|
||||||
|
}
|
||||||
|
|
||||||
|
LeaveCriticalSection(&s_handler_critical_section);
|
||||||
|
|
||||||
|
exit(code);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/// Shows a message through the message handler.
|
/// Shows a message through the message handler.
|
||||||
void NV_CDECL nvDebugPrint(const char *msg, ...)
|
void NV_CDECL nvDebugPrint(const char *msg, ...)
|
||||||
@ -987,13 +981,11 @@ void debug::enableSigHandler(bool interactive)
|
|||||||
|
|
||||||
s_old_exception_filter = ::SetUnhandledExceptionFilter( handleException );
|
s_old_exception_filter = ::SetUnhandledExceptionFilter( handleException );
|
||||||
|
|
||||||
/*
|
|
||||||
#if _MSC_VER >= 1400 // MSVC 2005/8
|
#if _MSC_VER >= 1400 // MSVC 2005/8
|
||||||
_set_invalid_parameter_handler(handleInvalidParameter);
|
_set_invalid_parameter_handler(handleInvalidParameter);
|
||||||
#endif // _MSC_VER >= 1400
|
#endif // _MSC_VER >= 1400
|
||||||
|
|
||||||
_set_purecall_handler(handlePureVirtualCall);
|
_set_purecall_handler(handlePureVirtualCall);
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
// SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
|
// SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
|
||||||
|
@ -197,6 +197,8 @@ namespace nv
|
|||||||
|
|
||||||
NVCORE_API bool isDebuggerPresent();
|
NVCORE_API bool isDebuggerPresent();
|
||||||
NVCORE_API bool attachToDebugger();
|
NVCORE_API bool attachToDebugger();
|
||||||
|
|
||||||
|
NVCORE_API void terminate(int code);
|
||||||
}
|
}
|
||||||
|
|
||||||
} // nv namespace
|
} // nv namespace
|
||||||
|
@ -207,6 +207,13 @@ namespace nv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) {
|
||||||
|
for (uint i = old_size; i < new_size; i++) {
|
||||||
|
new(ptr+i) T(src[i]); // placement new
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
|
void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
|
||||||
for (uint i = new_size; i < old_size; i++) {
|
for (uint i = new_size; i < old_size; i++) {
|
||||||
@ -223,7 +230,7 @@ namespace nv
|
|||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
void copy(T * restrict dst, const T * restrict src, uint count) {
|
void copy_range(T * restrict dst, const T * restrict src, uint count) {
|
||||||
for (uint i = 0; i < count; i++) {
|
for (uint i = 0; i < count; i++) {
|
||||||
dst[i] = src[i];
|
dst[i] = src[i];
|
||||||
}
|
}
|
||||||
|
@ -1338,7 +1338,7 @@ void FloatImage::flipZ()
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel) const
|
float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale/*=1*/) const
|
||||||
{
|
{
|
||||||
const uint w = m_width;
|
const uint w = m_width;
|
||||||
const uint h = m_height;
|
const uint h = m_height;
|
||||||
@ -1347,16 +1347,41 @@ float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel) const
|
|||||||
|
|
||||||
const float * alpha = channel(alphaChannel);
|
const float * alpha = channel(alphaChannel);
|
||||||
|
|
||||||
|
#if 0
|
||||||
const uint count = m_pixelCount;
|
const uint count = m_pixelCount;
|
||||||
for (uint i = 0; i < count; i++) {
|
for (uint i = 0; i < count; i++) {
|
||||||
if (alpha[i] > alphaRef) coverage += 1.0f; // @@ gt or lt?
|
if (alpha[i] > alphaRef) coverage += 1.0f; // @@ gt or lt?
|
||||||
}
|
}
|
||||||
|
|
||||||
return coverage / float(w * h);
|
return coverage / float(w * h);
|
||||||
|
#else
|
||||||
|
const uint n = 8;
|
||||||
|
|
||||||
|
// If we want subsampling:
|
||||||
|
for (uint y = 0; y < h-1; y++) {
|
||||||
|
for (uint x = 0; x < w-1; x++) {
|
||||||
|
|
||||||
|
float alpha00 = nv::saturate(pixel(alphaChannel, x+0, y+0, 0) * alphaScale);
|
||||||
|
float alpha10 = nv::saturate(pixel(alphaChannel, x+1, y+0, 0) * alphaScale);
|
||||||
|
float alpha01 = nv::saturate(pixel(alphaChannel, x+0, y+1, 0) * alphaScale);
|
||||||
|
float alpha11 = nv::saturate(pixel(alphaChannel, x+1, y+1, 0) * alphaScale);
|
||||||
|
|
||||||
|
for (float fy = 0.5f/n; fy < 1.0f; fy++) {
|
||||||
|
for (float fx = 0.5f/n; fx < 1.0f; fx++) {
|
||||||
|
float alpha = alpha00 * (1 - fx) * (1 - fy) + alpha10 * fx * (1 - fy) + alpha01 * (1 - fx) * fy + alpha11 * fx * fy;
|
||||||
|
if (alpha > alphaRef) coverage += 1.0f;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return coverage / float(w * h * n * n);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int alphaChannel)
|
void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int alphaChannel)
|
||||||
{
|
{
|
||||||
|
#if 0
|
||||||
float minAlphaRef = 0.0f;
|
float minAlphaRef = 0.0f;
|
||||||
float maxAlphaRef = 1.0f;
|
float maxAlphaRef = 1.0f;
|
||||||
float midAlphaRef = 0.5f;
|
float midAlphaRef = 0.5f;
|
||||||
@ -1383,8 +1408,35 @@ void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int
|
|||||||
// Scale alpha channel.
|
// Scale alpha channel.
|
||||||
scaleBias(alphaChannel, 1, alphaScale, 0.0f);
|
scaleBias(alphaChannel, 1, alphaScale, 0.0f);
|
||||||
clamp(alphaChannel, 1, 0.0f, 1.0f);
|
clamp(alphaChannel, 1, 0.0f, 1.0f);
|
||||||
|
#else
|
||||||
|
float minAlphaScale = 0.0f;
|
||||||
|
float maxAlphaScale = 4.0f;
|
||||||
|
float alphaScale = 1.0f;
|
||||||
|
|
||||||
//float newCoverage = alphaTestCoverage(alphaRef, alphaChannel);
|
// Determine desired scale using a binary search. Hardcoded to 8 steps max.
|
||||||
|
for (int i = 0; i < 10; i++) {
|
||||||
|
float currentCoverage = alphaTestCoverage(alphaRef, alphaChannel, alphaScale);
|
||||||
|
|
||||||
|
if (currentCoverage < desiredCoverage) {
|
||||||
|
minAlphaScale = alphaScale;
|
||||||
|
}
|
||||||
|
else if (currentCoverage > desiredCoverage) {
|
||||||
|
maxAlphaScale = alphaScale;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
alphaScale = (minAlphaScale + maxAlphaScale) * 0.5f;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale alpha channel.
|
||||||
|
scaleBias(alphaChannel, 1, alphaScale, 0.0f);
|
||||||
|
clamp(alphaChannel, 1, 0.0f, 1.0f);
|
||||||
|
#endif
|
||||||
|
#if _DEBUG
|
||||||
|
float newCoverage = alphaTestCoverage(alphaRef, alphaChannel);
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
FloatImage* FloatImage::clone() const
|
FloatImage* FloatImage::clone() const
|
||||||
|
@ -103,7 +103,7 @@ namespace nv
|
|||||||
NVIMAGE_API void flipY();
|
NVIMAGE_API void flipY();
|
||||||
NVIMAGE_API void flipZ();
|
NVIMAGE_API void flipZ();
|
||||||
|
|
||||||
NVIMAGE_API float alphaTestCoverage(float alphaRef, int alphaChannel) const;
|
NVIMAGE_API float alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale = 1.0f) const;
|
||||||
NVIMAGE_API void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel);
|
NVIMAGE_API void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel);
|
||||||
|
|
||||||
|
|
||||||
|
@ -76,6 +76,10 @@
|
|||||||
#include "Half.h"
|
#include "Half.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
|
#if NV_CC_GNUC
|
||||||
|
#include <xmmintrin.h>
|
||||||
|
#endif
|
||||||
|
|
||||||
// Load immediate
|
// Load immediate
|
||||||
static inline uint32 _uint32_li( uint32 a )
|
static inline uint32 _uint32_li( uint32 a )
|
||||||
{
|
{
|
||||||
@ -488,10 +492,79 @@ nv::half_to_float( uint16 h )
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
// @@ This code appears to be wrong.
|
static __m128 half_to_float4_SSE2(__m128i h)
|
||||||
|
{
|
||||||
|
#define SSE_CONST4(name, val) static const __declspec(align(16)) uint name[4] = { (val), (val), (val), (val) }
|
||||||
|
#define CONST(name) *(const __m128i *)&name
|
||||||
|
|
||||||
|
SSE_CONST4(mask_nosign, 0x7fff);
|
||||||
|
SSE_CONST4(mask_justsign, 0x8000);
|
||||||
|
SSE_CONST4(mask_shifted_exp, 0x7c00 << 13);
|
||||||
|
SSE_CONST4(expadjust_normal, (127 - 15) << 23);
|
||||||
|
SSE_CONST4(expadjust_infnan, (128 - 16) << 23);
|
||||||
|
SSE_CONST4(expadjust_denorm, 1 << 23);
|
||||||
|
SSE_CONST4(magic_denorm, 113 << 23);
|
||||||
|
|
||||||
|
__m128i mnosign = CONST(mask_nosign);
|
||||||
|
__m128i expmant = _mm_and_si128(mnosign, h);
|
||||||
|
__m128i justsign = _mm_and_si128(h, CONST(mask_justsign));
|
||||||
|
__m128i mshiftexp = CONST(mask_shifted_exp);
|
||||||
|
__m128i eadjust = CONST(expadjust_normal);
|
||||||
|
__m128i shifted = _mm_slli_epi32(expmant, 13);
|
||||||
|
__m128i adjusted = _mm_add_epi32(eadjust, shifted);
|
||||||
|
__m128i justexp = _mm_and_si128(shifted, mshiftexp);
|
||||||
|
|
||||||
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
__m128i b_isinfnan = _mm_cmpeq_epi32(mshiftexp, justexp);
|
||||||
|
__m128i b_isdenorm = _mm_cmpeq_epi32(zero, justexp);
|
||||||
|
|
||||||
|
__m128i adj_infnan = _mm_and_si128(b_isinfnan, CONST(expadjust_infnan));
|
||||||
|
__m128i adjusted2 = _mm_add_epi32(adjusted, adj_infnan);
|
||||||
|
|
||||||
|
__m128i adj_den = CONST(expadjust_denorm);
|
||||||
|
__m128i den1 = _mm_add_epi32(adj_den, adjusted2);
|
||||||
|
__m128 den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
|
||||||
|
__m128 adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
|
||||||
|
__m128 adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2));
|
||||||
|
__m128 adjusted5 = _mm_or_ps(adjusted3, adjusted4);
|
||||||
|
__m128i sign = _mm_slli_epi32(justsign, 16);
|
||||||
|
__m128 final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));
|
||||||
|
|
||||||
|
// ~21 SSE2 ops.
|
||||||
|
return final;
|
||||||
|
|
||||||
|
#undef SSE_CONST4
|
||||||
|
#undef CONST
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
void nv::half_to_float_array(const uint16 * vin, float * vout, int count) {
|
||||||
|
nvDebugCheck((intptr_t(vin) & 15) == 0);
|
||||||
|
nvDebugCheck((intptr_t(vout) & 15) == 0);
|
||||||
|
nvDebugCheck((count & 7) == 0);
|
||||||
|
|
||||||
|
__m128i zero = _mm_setzero_si128();
|
||||||
|
|
||||||
|
for (int i = 0; i < count; i += 8)
|
||||||
|
{
|
||||||
|
__m128i in = _mm_loadu_si128((const __m128i *)(vin + i));
|
||||||
|
__m128i a = _mm_unpacklo_epi16(in, zero);
|
||||||
|
__m128i b = _mm_unpackhi_epi16(in, zero);
|
||||||
|
|
||||||
|
__m128 outa = half_to_float4_SSE2(a);
|
||||||
|
_mm_storeu_ps((float *)(vout + i), outa);
|
||||||
|
|
||||||
|
__m128 outb = half_to_float4_SSE2(b);
|
||||||
|
_mm_storeu_ps((float *)(vout + i + 4), outb);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
// @@ These tables could be smaller.
|
// @@ These tables could be smaller.
|
||||||
namespace nv {
|
namespace nv {
|
||||||
uint32 mantissa_table[2048];
|
uint32 mantissa_table[2048] = { 0xDEADBEEF };
|
||||||
uint32 exponent_table[64];
|
uint32 exponent_table[64];
|
||||||
uint32 offset_table[64];
|
uint32 offset_table[64];
|
||||||
}
|
}
|
||||||
|
@ -9,6 +9,9 @@ namespace nv {
|
|||||||
uint32 half_to_float( uint16 h );
|
uint32 half_to_float( uint16 h );
|
||||||
uint16 half_from_float( uint32 f );
|
uint16 half_from_float( uint32 f );
|
||||||
|
|
||||||
|
// vin,vout must be 16 byte aligned. count must be a multiple of 8.
|
||||||
|
void half_to_float_array(const uint16 * vin, float * vout, int count);
|
||||||
|
|
||||||
void half_init_tables();
|
void half_init_tables();
|
||||||
|
|
||||||
extern uint32 mantissa_table[2048];
|
extern uint32 mantissa_table[2048];
|
||||||
@ -19,6 +22,7 @@ namespace nv {
|
|||||||
// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
|
// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
|
||||||
inline uint32 fast_half_to_float(uint16 h)
|
inline uint32 fast_half_to_float(uint16 h)
|
||||||
{
|
{
|
||||||
|
nvDebugCheck(mantissa_table[0] == 0); // Make sure table was initialized.
|
||||||
uint exp = h >> 10;
|
uint exp = h >> 10;
|
||||||
return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
|
return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
|
||||||
}
|
}
|
||||||
|
@ -62,6 +62,7 @@ namespace nv
|
|||||||
Matrix();
|
Matrix();
|
||||||
explicit Matrix(float f);
|
explicit Matrix(float f);
|
||||||
explicit Matrix(identity_t);
|
explicit Matrix(identity_t);
|
||||||
|
Matrix(const Matrix3 & m);
|
||||||
Matrix(const Matrix & m);
|
Matrix(const Matrix & m);
|
||||||
Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
|
Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
|
||||||
//explicit Matrix(const float m[]); // m is assumed to contain 16 elements
|
//explicit Matrix(const float m[]); // m is assumed to contain 16 elements
|
||||||
|
@ -250,6 +250,19 @@ namespace nv
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline Matrix::Matrix(const Matrix3 & m)
|
||||||
|
{
|
||||||
|
for(int i = 0; i < 3; i++) {
|
||||||
|
for(int j = 0; j < 3; j++) {
|
||||||
|
operator()(i, j) = m.get(i, j);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(int i = 0; i < 4; i++) {
|
||||||
|
operator()(3, i) = 0;
|
||||||
|
operator()(i, 3) = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3)
|
inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3)
|
||||||
{
|
{
|
||||||
m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w;
|
m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w;
|
||||||
|
@ -16,7 +16,7 @@ using namespace nv;
|
|||||||
#define ENABLE_PARALLEL_FOR 0
|
#define ENABLE_PARALLEL_FOR 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
void worker(void * arg) {
|
static void worker(void * arg) {
|
||||||
ParallelFor * owner = (ParallelFor *)arg;
|
ParallelFor * owner = (ParallelFor *)arg;
|
||||||
|
|
||||||
while(true) {
|
while(true) {
|
||||||
|
@ -92,8 +92,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
|||||||
{
|
{
|
||||||
int p = order[i];
|
int p = order[i];
|
||||||
#if NVTT_USE_SIMD
|
#if NVTT_USE_SIMD
|
||||||
NV_ALIGN_16 Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
|
NV_ALIGN_16 Vector4 tmp(values[p], 1);
|
||||||
m_weighted[i] = SimdVector(tmp.component);
|
m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]);
|
||||||
m_xxsum += m_weighted[i] * m_weighted[i];
|
m_xxsum += m_weighted[i] * m_weighted[i];
|
||||||
m_xsum += m_weighted[i];
|
m_xsum += m_weighted[i];
|
||||||
#else
|
#else
|
||||||
|
@ -40,6 +40,7 @@
|
|||||||
#include "nvimage/BlockDXT.h"
|
#include "nvimage/BlockDXT.h"
|
||||||
|
|
||||||
#include "nvmath/Vector.inl"
|
#include "nvmath/Vector.inl"
|
||||||
|
#include "nvmath/Color.inl"
|
||||||
|
|
||||||
#include "nvcore/Memory.h"
|
#include "nvcore/Memory.h"
|
||||||
|
|
||||||
@ -111,18 +112,15 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
|
|||||||
QuickCompress::compressDXT5(rgba, block);
|
QuickCompress::compressDXT5(rgba, block);
|
||||||
}
|
}
|
||||||
|
|
||||||
#if 1
|
#if 0
|
||||||
void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
set.setUniformWeights();
|
set.setUniformWeights();
|
||||||
set.createMinimalSet(false);
|
set.createMinimalSet(/*ignoreTransparent*/false);
|
||||||
|
|
||||||
ClusterFit fit;
|
|
||||||
fit.setMetric(compressionOptions.colorWeight);
|
|
||||||
|
|
||||||
BlockDXT1 * block = new(output) BlockDXT1;
|
BlockDXT1 * block = new(output) BlockDXT1;
|
||||||
|
|
||||||
if (set.isSingleColor(true))
|
if (set.isSingleColor(/*ignoreAlpha*/true))
|
||||||
{
|
{
|
||||||
Color32 c;
|
Color32 c;
|
||||||
c.r = uint8(clamp(set.colors[0].x, 0.0f, 1.0f) * 255);
|
c.r = uint8(clamp(set.colors[0].x, 0.0f, 1.0f) * 255);
|
||||||
@ -133,16 +131,19 @@ void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, co
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
ClusterFit fit;
|
||||||
|
fit.setMetric(compressionOptions.colorWeight);
|
||||||
fit.setColourSet(&set);
|
fit.setColourSet(&set);
|
||||||
|
|
||||||
Vector3 start, end;
|
Vector3 start, end;
|
||||||
|
|
||||||
fit.compress4(&start, &end);
|
fit.compress4(&start, &end);
|
||||||
QuickCompress::outputBlock4(set, start, end, block);
|
|
||||||
|
|
||||||
if (fit.compress3(&start, &end)) {
|
if (fit.compress3(&start, &end)) {
|
||||||
QuickCompress::outputBlock3(set, start, end, block);
|
QuickCompress::outputBlock3(set, start, end, block);
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
QuickCompress::outputBlock4(set, start, end, block);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#else
|
#else
|
||||||
@ -219,16 +220,15 @@ void CompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode,
|
|||||||
nvsquish::WeightedClusterFit fit;
|
nvsquish::WeightedClusterFit fit;
|
||||||
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
|
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
|
||||||
|
|
||||||
int flags = 0;
|
int flags = 0;
|
||||||
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||||
|
|
||||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||||
fit.SetColourSet(&colours, 0);
|
fit.SetColourSet(&colours, 0);
|
||||||
fit.Compress(&block->color);
|
fit.Compress(&block->color);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void CompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
void CompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||||
{
|
{
|
||||||
BlockDXT5 * block = new(output) BlockDXT5;
|
BlockDXT5 * block = new(output) BlockDXT5;
|
||||||
|
@ -64,7 +64,7 @@ namespace nv
|
|||||||
|
|
||||||
|
|
||||||
// Normal CPU compressors.
|
// Normal CPU compressors.
|
||||||
#if 1
|
#if 0
|
||||||
struct CompressorDXT1 : public ColorSetCompressor
|
struct CompressorDXT1 : public ColorSetCompressor
|
||||||
{
|
{
|
||||||
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||||
|
@ -310,7 +310,7 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
|
|||||||
{
|
{
|
||||||
for (uint y = 0; y < h; y++)
|
for (uint y = 0; y < h; y++)
|
||||||
{
|
{
|
||||||
const float * src = (const float *)data + y * w;
|
const float * src = (const float *)data + (z * h + y) * w;
|
||||||
|
|
||||||
BitStream stream(dst);
|
BitStream stream(dst);
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user