Merge changes from The Witness.
This commit is contained in:
parent
3b4fcd0369
commit
04bdc76749
@ -290,11 +290,23 @@ namespace nv
|
||||
template <typename T>
|
||||
NV_FORCEINLINE void Array<T>::copy(const T * data, uint count)
|
||||
{
|
||||
destroy_range(m_buffer, count, m_size);
|
||||
#if 1 // More simple, but maybe not be as efficient?
|
||||
destroy_range(m_buffer, 0, m_size);
|
||||
|
||||
setArraySize(count);
|
||||
|
||||
::nv::copy(m_buffer, data, count);
|
||||
construct_range(m_buffer, count, 0, data);
|
||||
#else
|
||||
const uint old_size = m_size;
|
||||
|
||||
destroy_range(m_buffer, count, old_size);
|
||||
|
||||
setArraySize(count);
|
||||
|
||||
copy_range(m_buffer, data, old_size);
|
||||
|
||||
construct_range(m_buffer, count, old_size, data);
|
||||
#endif
|
||||
}
|
||||
|
||||
// Assignment operator.
|
||||
|
@ -172,48 +172,53 @@ namespace
|
||||
return false;
|
||||
}
|
||||
|
||||
MINIDUMP_EXCEPTION_INFORMATION ExInfo;
|
||||
ExInfo.ThreadId = ::GetCurrentThreadId();
|
||||
ExInfo.ExceptionPointers = pExceptionInfo;
|
||||
ExInfo.ClientPointers = NULL;
|
||||
MINIDUMP_EXCEPTION_INFORMATION * pExInfo = NULL;
|
||||
MINIDUMP_CALLBACK_INFORMATION * pCallback = NULL;
|
||||
|
||||
MINIDUMP_CALLBACK_INFORMATION callback;
|
||||
MINIDUMP_CALLBACK_INFORMATION * callback_pointer = NULL;
|
||||
MinidumpCallbackContext context;
|
||||
if (pExceptionInfo != NULL) {
|
||||
MINIDUMP_EXCEPTION_INFORMATION ExInfo;
|
||||
ExInfo.ThreadId = ::GetCurrentThreadId();
|
||||
ExInfo.ExceptionPointers = pExceptionInfo;
|
||||
ExInfo.ClientPointers = NULL;
|
||||
pExInfo = &ExInfo;
|
||||
|
||||
// Find a memory region of 256 bytes centered on the
|
||||
// faulting instruction pointer.
|
||||
const ULONG64 instruction_pointer =
|
||||
#if defined(_M_IX86)
|
||||
pExceptionInfo->ContextRecord->Eip;
|
||||
#elif defined(_M_AMD64)
|
||||
pExceptionInfo->ContextRecord->Rip;
|
||||
#else
|
||||
#error Unsupported platform
|
||||
#endif
|
||||
MINIDUMP_CALLBACK_INFORMATION callback;
|
||||
MinidumpCallbackContext context;
|
||||
|
||||
MEMORY_BASIC_INFORMATION info;
|
||||
|
||||
if (VirtualQuery(reinterpret_cast<LPCVOID>(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT)
|
||||
{
|
||||
// Attempt to get 128 bytes before and after the instruction
|
||||
// pointer, but settle for whatever's available up to the
|
||||
// boundaries of the memory region.
|
||||
const ULONG64 kIPMemorySize = 256;
|
||||
context.memory_base = max(reinterpret_cast<ULONG64>(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2));
|
||||
ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast<ULONG64>(info.BaseAddress) + info.RegionSize);
|
||||
context.memory_size = static_cast<ULONG>(end_of_range - context.memory_base);
|
||||
context.finished = false;
|
||||
// Find a memory region of 256 bytes centered on the
|
||||
// faulting instruction pointer.
|
||||
const ULONG64 instruction_pointer =
|
||||
#if defined(_M_IX86)
|
||||
pExceptionInfo->ContextRecord->Eip;
|
||||
#elif defined(_M_AMD64)
|
||||
pExceptionInfo->ContextRecord->Rip;
|
||||
#else
|
||||
#error Unsupported platform
|
||||
#endif
|
||||
|
||||
callback.CallbackRoutine = miniDumpWriteDumpCallback;
|
||||
callback.CallbackParam = reinterpret_cast<void*>(&context);
|
||||
callback_pointer = &callback;
|
||||
MEMORY_BASIC_INFORMATION info;
|
||||
|
||||
if (VirtualQuery(reinterpret_cast<LPCVOID>(instruction_pointer), &info, sizeof(MEMORY_BASIC_INFORMATION)) != 0 && info.State == MEM_COMMIT)
|
||||
{
|
||||
// Attempt to get 128 bytes before and after the instruction
|
||||
// pointer, but settle for whatever's available up to the
|
||||
// boundaries of the memory region.
|
||||
const ULONG64 kIPMemorySize = 256;
|
||||
context.memory_base = max(reinterpret_cast<ULONG64>(info.BaseAddress), instruction_pointer - (kIPMemorySize / 2));
|
||||
ULONG64 end_of_range = min(instruction_pointer + (kIPMemorySize / 2), reinterpret_cast<ULONG64>(info.BaseAddress) + info.RegionSize);
|
||||
context.memory_size = static_cast<ULONG>(end_of_range - context.memory_base);
|
||||
context.finished = false;
|
||||
|
||||
callback.CallbackRoutine = miniDumpWriteDumpCallback;
|
||||
callback.CallbackParam = reinterpret_cast<void*>(&context);
|
||||
pCallback = &callback;
|
||||
}
|
||||
}
|
||||
|
||||
MINIDUMP_TYPE miniDumpType = (MINIDUMP_TYPE)(MiniDumpNormal|MiniDumpWithHandleData|MiniDumpWithThreadInfo);
|
||||
|
||||
// write the dump
|
||||
BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, &ExInfo, NULL, callback_pointer) != 0;
|
||||
BOOL ok = MiniDumpWriteDump(GetCurrentProcess(), GetCurrentProcessId(), hFile, miniDumpType, pExInfo, NULL, pCallback) != 0;
|
||||
CloseHandle(hFile);
|
||||
|
||||
if (ok == FALSE) {
|
||||
@ -402,9 +407,8 @@ namespace
|
||||
// Write mini dump and print stack trace.
|
||||
static LONG WINAPI handleException(EXCEPTION_POINTERS * pExceptionInfo)
|
||||
{
|
||||
#if USE_SEPARATE_THREAD
|
||||
EnterCriticalSection(&s_handler_critical_section);
|
||||
|
||||
#if USE_SEPARATE_THREAD
|
||||
s_requesting_thread_id = GetCurrentThreadId();
|
||||
s_exception_info = pExceptionInfo;
|
||||
|
||||
@ -418,12 +422,11 @@ namespace
|
||||
// Clean up.
|
||||
s_requesting_thread_id = 0;
|
||||
s_exception_info = NULL;
|
||||
|
||||
LeaveCriticalSection(&s_handler_critical_section);
|
||||
#else
|
||||
// First of all, write mini dump.
|
||||
writeMiniDump(pExceptionInfo);
|
||||
#endif
|
||||
LeaveCriticalSection(&s_handler_critical_section);
|
||||
|
||||
nvDebug("\nDump file saved.\n");
|
||||
|
||||
@ -454,62 +457,21 @@ namespace
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
return EXCEPTION_EXECUTE_HANDLER; // Terminate app.
|
||||
// This should terminate the process and set the error exit code.
|
||||
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 2);
|
||||
|
||||
return EXCEPTION_EXECUTE_HANDLER; // Terminate app. In case terminate process did not succeed.
|
||||
}
|
||||
|
||||
/*static void handlePureVirtualCall() {
|
||||
// This is an pure virtual function call, not an exception. It's safe to
|
||||
// play with sprintf here.
|
||||
AutoExceptionHandler auto_exception_handler;
|
||||
ExceptionHandler* current_handler = auto_exception_handler.get_handler();
|
||||
static void handlePureVirtualCall() {
|
||||
nvDebugBreak();
|
||||
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
|
||||
}
|
||||
|
||||
MDRawAssertionInfo assertion;
|
||||
memset(&assertion, 0, sizeof(assertion));
|
||||
assertion.type = MD_ASSERTION_INFO_TYPE_PURE_VIRTUAL_CALL;
|
||||
|
||||
// Make up an exception record for the current thread and CPU context
|
||||
// to make it possible for the crash processor to classify these
|
||||
// as do regular crashes, and to make it humane for developers to
|
||||
// analyze them.
|
||||
EXCEPTION_RECORD exception_record = {};
|
||||
CONTEXT exception_context = {};
|
||||
EXCEPTION_POINTERS exception_ptrs = { &exception_record, &exception_context };
|
||||
|
||||
::RtlCaptureContext(&exception_context);
|
||||
|
||||
exception_record.ExceptionCode = STATUS_NONCONTINUABLE_EXCEPTION;
|
||||
|
||||
// We store pointers to the the expression and function strings,
|
||||
// and the line as exception parameters to make them easy to
|
||||
// access by the developer on the far side.
|
||||
exception_record.NumberParameters = 3;
|
||||
exception_record.ExceptionInformation[0] = reinterpret_cast<ULONG_PTR>(&assertion.expression);
|
||||
exception_record.ExceptionInformation[1] = reinterpret_cast<ULONG_PTR>(&assertion.file);
|
||||
exception_record.ExceptionInformation[2] = assertion.line;
|
||||
|
||||
bool success = false;
|
||||
// In case of out-of-process dump generation, directly call
|
||||
// WriteMinidumpWithException since there is no separate thread running.
|
||||
|
||||
success = current_handler->WriteMinidumpOnHandlerThread(&exception_ptrs, &assertion);
|
||||
|
||||
if (!success) {
|
||||
if (current_handler->previous_pch_) {
|
||||
// The handler didn't fully handle the exception. Give it to the
|
||||
// previous purecall handler.
|
||||
current_handler->previous_pch_();
|
||||
else {
|
||||
// If there's no previous handler, return and let _purecall handle it.
|
||||
// This will just put up an assertion dialog.
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// The handler either took care of the invalid parameter problem itself,
|
||||
// or passed it on to another handler. "Swallow" it by exiting, paralleling
|
||||
// the behavior of "swallowing" exceptions.
|
||||
exit(0);
|
||||
}*/
|
||||
static void handleInvalidParameter(const wchar_t * expresion, const wchar_t * function, const wchar_t * file, unsigned int line, uintptr_t reserved) {
|
||||
nvDebugBreak();
|
||||
TerminateProcess(GetCurrentProcess(), EXIT_FAILURE + 8);
|
||||
}
|
||||
|
||||
|
||||
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) // NV_OS_LINUX || NV_OS_DARWIN
|
||||
@ -755,8 +717,8 @@ namespace
|
||||
}
|
||||
|
||||
if (ret == NV_ABORT_EXIT) {
|
||||
// Exit cleanly.
|
||||
throw "Assertion failed";
|
||||
// Exit cleanly.
|
||||
exit(EXIT_FAILURE + 1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -788,7 +750,7 @@ namespace
|
||||
|
||||
if( ret == NV_ABORT_EXIT ) {
|
||||
// Exit cleanly.
|
||||
throw "Assertion failed";
|
||||
exit(EXIT_FAILURE + 1);
|
||||
}
|
||||
|
||||
return ret;
|
||||
@ -825,7 +787,7 @@ namespace
|
||||
#endif
|
||||
|
||||
// Exit cleanly.
|
||||
throw "Assertion failed";
|
||||
exit(EXIT_FAILURE + 1);
|
||||
}
|
||||
};
|
||||
|
||||
@ -853,6 +815,38 @@ int nvAbort(const char * exp, const char * file, int line, const char * func/*=N
|
||||
}
|
||||
}
|
||||
|
||||
// Abnormal termination. Create mini dump and output call stack.
|
||||
void debug::terminate(int code)
|
||||
{
|
||||
EnterCriticalSection(&s_handler_critical_section);
|
||||
|
||||
writeMiniDump(NULL);
|
||||
|
||||
const int max_stack_size = 64;
|
||||
void * trace[max_stack_size];
|
||||
int size = backtrace(trace, max_stack_size);
|
||||
|
||||
// @@ Use win32's CreateFile?
|
||||
FILE * fp = fileOpen("crash.txt", "wb");
|
||||
if (fp != NULL) {
|
||||
Array<const char *> lines;
|
||||
writeStackTrace(trace, size, 0, lines);
|
||||
|
||||
for (uint i = 0; i < lines.count(); i++) {
|
||||
fputs(lines[i], fp);
|
||||
delete lines[i];
|
||||
}
|
||||
|
||||
// @@ Add more info to crash.txt?
|
||||
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
LeaveCriticalSection(&s_handler_critical_section);
|
||||
|
||||
exit(code);
|
||||
}
|
||||
|
||||
|
||||
/// Shows a message through the message handler.
|
||||
void NV_CDECL nvDebugPrint(const char *msg, ...)
|
||||
@ -987,13 +981,11 @@ void debug::enableSigHandler(bool interactive)
|
||||
|
||||
s_old_exception_filter = ::SetUnhandledExceptionFilter( handleException );
|
||||
|
||||
/*
|
||||
#if _MSC_VER >= 1400 // MSVC 2005/8
|
||||
_set_invalid_parameter_handler(handleInvalidParameter);
|
||||
#endif // _MSC_VER >= 1400
|
||||
|
||||
_set_purecall_handler(handlePureVirtualCall);
|
||||
*/
|
||||
|
||||
|
||||
// SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
|
||||
|
@ -197,6 +197,8 @@ namespace nv
|
||||
|
||||
NVCORE_API bool isDebuggerPresent();
|
||||
NVCORE_API bool attachToDebugger();
|
||||
|
||||
NVCORE_API void terminate(int code);
|
||||
}
|
||||
|
||||
} // nv namespace
|
||||
|
@ -207,6 +207,13 @@ namespace nv
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void construct_range(T * restrict ptr, uint new_size, uint old_size, const T * src) {
|
||||
for (uint i = old_size; i < new_size; i++) {
|
||||
new(ptr+i) T(src[i]); // placement new
|
||||
}
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void destroy_range(T * restrict ptr, uint new_size, uint old_size) {
|
||||
for (uint i = new_size; i < old_size; i++) {
|
||||
@ -223,7 +230,7 @@ namespace nv
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
void copy(T * restrict dst, const T * restrict src, uint count) {
|
||||
void copy_range(T * restrict dst, const T * restrict src, uint count) {
|
||||
for (uint i = 0; i < count; i++) {
|
||||
dst[i] = src[i];
|
||||
}
|
||||
|
@ -1338,7 +1338,7 @@ void FloatImage::flipZ()
|
||||
|
||||
|
||||
|
||||
float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel) const
|
||||
float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale/*=1*/) const
|
||||
{
|
||||
const uint w = m_width;
|
||||
const uint h = m_height;
|
||||
@ -1347,16 +1347,41 @@ float FloatImage::alphaTestCoverage(float alphaRef, int alphaChannel) const
|
||||
|
||||
const float * alpha = channel(alphaChannel);
|
||||
|
||||
#if 0
|
||||
const uint count = m_pixelCount;
|
||||
for (uint i = 0; i < count; i++) {
|
||||
if (alpha[i] > alphaRef) coverage += 1.0f; // @@ gt or lt?
|
||||
}
|
||||
|
||||
|
||||
return coverage / float(w * h);
|
||||
#else
|
||||
const uint n = 8;
|
||||
|
||||
// If we want subsampling:
|
||||
for (uint y = 0; y < h-1; y++) {
|
||||
for (uint x = 0; x < w-1; x++) {
|
||||
|
||||
float alpha00 = nv::saturate(pixel(alphaChannel, x+0, y+0, 0) * alphaScale);
|
||||
float alpha10 = nv::saturate(pixel(alphaChannel, x+1, y+0, 0) * alphaScale);
|
||||
float alpha01 = nv::saturate(pixel(alphaChannel, x+0, y+1, 0) * alphaScale);
|
||||
float alpha11 = nv::saturate(pixel(alphaChannel, x+1, y+1, 0) * alphaScale);
|
||||
|
||||
for (float fy = 0.5f/n; fy < 1.0f; fy++) {
|
||||
for (float fx = 0.5f/n; fx < 1.0f; fx++) {
|
||||
float alpha = alpha00 * (1 - fx) * (1 - fy) + alpha10 * fx * (1 - fy) + alpha01 * (1 - fx) * fy + alpha11 * fx * fy;
|
||||
if (alpha > alphaRef) coverage += 1.0f;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return coverage / float(w * h * n * n);
|
||||
#endif
|
||||
}
|
||||
|
||||
void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int alphaChannel)
|
||||
{
|
||||
#if 0
|
||||
float minAlphaRef = 0.0f;
|
||||
float maxAlphaRef = 1.0f;
|
||||
float midAlphaRef = 0.5f;
|
||||
@ -1383,8 +1408,35 @@ void FloatImage::scaleAlphaToCoverage(float desiredCoverage, float alphaRef, int
|
||||
// Scale alpha channel.
|
||||
scaleBias(alphaChannel, 1, alphaScale, 0.0f);
|
||||
clamp(alphaChannel, 1, 0.0f, 1.0f);
|
||||
#else
|
||||
float minAlphaScale = 0.0f;
|
||||
float maxAlphaScale = 4.0f;
|
||||
float alphaScale = 1.0f;
|
||||
|
||||
//float newCoverage = alphaTestCoverage(alphaRef, alphaChannel);
|
||||
// Determine desired scale using a binary search. Hardcoded to 8 steps max.
|
||||
for (int i = 0; i < 10; i++) {
|
||||
float currentCoverage = alphaTestCoverage(alphaRef, alphaChannel, alphaScale);
|
||||
|
||||
if (currentCoverage < desiredCoverage) {
|
||||
minAlphaScale = alphaScale;
|
||||
}
|
||||
else if (currentCoverage > desiredCoverage) {
|
||||
maxAlphaScale = alphaScale;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
|
||||
alphaScale = (minAlphaScale + maxAlphaScale) * 0.5f;
|
||||
}
|
||||
|
||||
// Scale alpha channel.
|
||||
scaleBias(alphaChannel, 1, alphaScale, 0.0f);
|
||||
clamp(alphaChannel, 1, 0.0f, 1.0f);
|
||||
#endif
|
||||
#if _DEBUG
|
||||
float newCoverage = alphaTestCoverage(alphaRef, alphaChannel);
|
||||
#endif
|
||||
}
|
||||
|
||||
FloatImage* FloatImage::clone() const
|
||||
|
@ -103,7 +103,7 @@ namespace nv
|
||||
NVIMAGE_API void flipY();
|
||||
NVIMAGE_API void flipZ();
|
||||
|
||||
NVIMAGE_API float alphaTestCoverage(float alphaRef, int alphaChannel) const;
|
||||
NVIMAGE_API float alphaTestCoverage(float alphaRef, int alphaChannel, float alphaScale = 1.0f) const;
|
||||
NVIMAGE_API void scaleAlphaToCoverage(float coverage, float alphaRef, int alphaChannel);
|
||||
|
||||
|
||||
|
@ -76,6 +76,10 @@
|
||||
#include "Half.h"
|
||||
#include <stdio.h>
|
||||
|
||||
#if NV_CC_GNUC
|
||||
#include <xmmintrin.h>
|
||||
#endif
|
||||
|
||||
// Load immediate
|
||||
static inline uint32 _uint32_li( uint32 a )
|
||||
{
|
||||
@ -488,10 +492,79 @@ nv::half_to_float( uint16 h )
|
||||
}
|
||||
|
||||
|
||||
// @@ This code appears to be wrong.
|
||||
static __m128 half_to_float4_SSE2(__m128i h)
|
||||
{
|
||||
#define SSE_CONST4(name, val) static const __declspec(align(16)) uint name[4] = { (val), (val), (val), (val) }
|
||||
#define CONST(name) *(const __m128i *)&name
|
||||
|
||||
SSE_CONST4(mask_nosign, 0x7fff);
|
||||
SSE_CONST4(mask_justsign, 0x8000);
|
||||
SSE_CONST4(mask_shifted_exp, 0x7c00 << 13);
|
||||
SSE_CONST4(expadjust_normal, (127 - 15) << 23);
|
||||
SSE_CONST4(expadjust_infnan, (128 - 16) << 23);
|
||||
SSE_CONST4(expadjust_denorm, 1 << 23);
|
||||
SSE_CONST4(magic_denorm, 113 << 23);
|
||||
|
||||
__m128i mnosign = CONST(mask_nosign);
|
||||
__m128i expmant = _mm_and_si128(mnosign, h);
|
||||
__m128i justsign = _mm_and_si128(h, CONST(mask_justsign));
|
||||
__m128i mshiftexp = CONST(mask_shifted_exp);
|
||||
__m128i eadjust = CONST(expadjust_normal);
|
||||
__m128i shifted = _mm_slli_epi32(expmant, 13);
|
||||
__m128i adjusted = _mm_add_epi32(eadjust, shifted);
|
||||
__m128i justexp = _mm_and_si128(shifted, mshiftexp);
|
||||
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
__m128i b_isinfnan = _mm_cmpeq_epi32(mshiftexp, justexp);
|
||||
__m128i b_isdenorm = _mm_cmpeq_epi32(zero, justexp);
|
||||
|
||||
__m128i adj_infnan = _mm_and_si128(b_isinfnan, CONST(expadjust_infnan));
|
||||
__m128i adjusted2 = _mm_add_epi32(adjusted, adj_infnan);
|
||||
|
||||
__m128i adj_den = CONST(expadjust_denorm);
|
||||
__m128i den1 = _mm_add_epi32(adj_den, adjusted2);
|
||||
__m128 den2 = _mm_sub_ps(_mm_castsi128_ps(den1), *(const __m128 *)&magic_denorm);
|
||||
__m128 adjusted3 = _mm_and_ps(den2, _mm_castsi128_ps(b_isdenorm));
|
||||
__m128 adjusted4 = _mm_andnot_ps(_mm_castsi128_ps(b_isdenorm), _mm_castsi128_ps(adjusted2));
|
||||
__m128 adjusted5 = _mm_or_ps(adjusted3, adjusted4);
|
||||
__m128i sign = _mm_slli_epi32(justsign, 16);
|
||||
__m128 final = _mm_or_ps(adjusted5, _mm_castsi128_ps(sign));
|
||||
|
||||
// ~21 SSE2 ops.
|
||||
return final;
|
||||
|
||||
#undef SSE_CONST4
|
||||
#undef CONST
|
||||
}
|
||||
|
||||
|
||||
void nv::half_to_float_array(const uint16 * vin, float * vout, int count) {
|
||||
nvDebugCheck((intptr_t(vin) & 15) == 0);
|
||||
nvDebugCheck((intptr_t(vout) & 15) == 0);
|
||||
nvDebugCheck((count & 7) == 0);
|
||||
|
||||
__m128i zero = _mm_setzero_si128();
|
||||
|
||||
for (int i = 0; i < count; i += 8)
|
||||
{
|
||||
__m128i in = _mm_loadu_si128((const __m128i *)(vin + i));
|
||||
__m128i a = _mm_unpacklo_epi16(in, zero);
|
||||
__m128i b = _mm_unpackhi_epi16(in, zero);
|
||||
|
||||
__m128 outa = half_to_float4_SSE2(a);
|
||||
_mm_storeu_ps((float *)(vout + i), outa);
|
||||
|
||||
__m128 outb = half_to_float4_SSE2(b);
|
||||
_mm_storeu_ps((float *)(vout + i + 4), outb);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
// @@ These tables could be smaller.
|
||||
namespace nv {
|
||||
uint32 mantissa_table[2048];
|
||||
uint32 mantissa_table[2048] = { 0xDEADBEEF };
|
||||
uint32 exponent_table[64];
|
||||
uint32 offset_table[64];
|
||||
}
|
||||
|
@ -9,6 +9,9 @@ namespace nv {
|
||||
uint32 half_to_float( uint16 h );
|
||||
uint16 half_from_float( uint32 f );
|
||||
|
||||
// vin,vout must be 16 byte aligned. count must be a multiple of 8.
|
||||
void half_to_float_array(const uint16 * vin, float * vout, int count);
|
||||
|
||||
void half_init_tables();
|
||||
|
||||
extern uint32 mantissa_table[2048];
|
||||
@ -19,6 +22,7 @@ namespace nv {
|
||||
// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
|
||||
inline uint32 fast_half_to_float(uint16 h)
|
||||
{
|
||||
nvDebugCheck(mantissa_table[0] == 0); // Make sure table was initialized.
|
||||
uint exp = h >> 10;
|
||||
return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
|
||||
}
|
||||
|
@ -62,6 +62,7 @@ namespace nv
|
||||
Matrix();
|
||||
explicit Matrix(float f);
|
||||
explicit Matrix(identity_t);
|
||||
Matrix(const Matrix3 & m);
|
||||
Matrix(const Matrix & m);
|
||||
Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
|
||||
//explicit Matrix(const float m[]); // m is assumed to contain 16 elements
|
||||
|
@ -250,6 +250,19 @@ namespace nv
|
||||
}
|
||||
}
|
||||
|
||||
inline Matrix::Matrix(const Matrix3 & m)
|
||||
{
|
||||
for(int i = 0; i < 3; i++) {
|
||||
for(int j = 0; j < 3; j++) {
|
||||
operator()(i, j) = m.get(i, j);
|
||||
}
|
||||
}
|
||||
for(int i = 0; i < 4; i++) {
|
||||
operator()(3, i) = 0;
|
||||
operator()(i, 3) = 0;
|
||||
}
|
||||
}
|
||||
|
||||
inline Matrix::Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3)
|
||||
{
|
||||
m_data[ 0] = v0.x; m_data[ 1] = v0.y; m_data[ 2] = v0.z; m_data[ 3] = v0.w;
|
||||
|
@ -16,7 +16,7 @@ using namespace nv;
|
||||
#define ENABLE_PARALLEL_FOR 0
|
||||
#endif
|
||||
|
||||
void worker(void * arg) {
|
||||
static void worker(void * arg) {
|
||||
ParallelFor * owner = (ParallelFor *)arg;
|
||||
|
||||
while(true) {
|
||||
|
@ -92,8 +92,8 @@ void ClusterFit::setColourSet(const ColorSet * set)
|
||||
{
|
||||
int p = order[i];
|
||||
#if NVTT_USE_SIMD
|
||||
NV_ALIGN_16 Vector4 tmp(values[p] * set->weights[p], set->weights[p]);
|
||||
m_weighted[i] = SimdVector(tmp.component);
|
||||
NV_ALIGN_16 Vector4 tmp(values[p], 1);
|
||||
m_weighted[i] = SimdVector(tmp.component) * SimdVector(set->weights[p]);
|
||||
m_xxsum += m_weighted[i] * m_weighted[i];
|
||||
m_xsum += m_weighted[i];
|
||||
#else
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include "nvimage/BlockDXT.h"
|
||||
|
||||
#include "nvmath/Vector.inl"
|
||||
#include "nvmath/Color.inl"
|
||||
|
||||
#include "nvcore/Memory.h"
|
||||
|
||||
@ -111,18 +112,15 @@ void FastCompressorDXT5n::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alpha
|
||||
QuickCompress::compressDXT5(rgba, block);
|
||||
}
|
||||
|
||||
#if 1
|
||||
#if 0
|
||||
void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
set.setUniformWeights();
|
||||
set.createMinimalSet(false);
|
||||
|
||||
ClusterFit fit;
|
||||
fit.setMetric(compressionOptions.colorWeight);
|
||||
set.createMinimalSet(/*ignoreTransparent*/false);
|
||||
|
||||
BlockDXT1 * block = new(output) BlockDXT1;
|
||||
|
||||
if (set.isSingleColor(true))
|
||||
if (set.isSingleColor(/*ignoreAlpha*/true))
|
||||
{
|
||||
Color32 c;
|
||||
c.r = uint8(clamp(set.colors[0].x, 0.0f, 1.0f) * 255);
|
||||
@ -133,16 +131,19 @@ void CompressorDXT1::compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, co
|
||||
}
|
||||
else
|
||||
{
|
||||
ClusterFit fit;
|
||||
fit.setMetric(compressionOptions.colorWeight);
|
||||
fit.setColourSet(&set);
|
||||
|
||||
Vector3 start, end;
|
||||
|
||||
fit.compress4(&start, &end);
|
||||
QuickCompress::outputBlock4(set, start, end, block);
|
||||
|
||||
if (fit.compress3(&start, &end)) {
|
||||
QuickCompress::outputBlock3(set, start, end, block);
|
||||
}
|
||||
else {
|
||||
QuickCompress::outputBlock4(set, start, end, block);
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
@ -219,16 +220,15 @@ void CompressorDXT3::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode,
|
||||
nvsquish::WeightedClusterFit fit;
|
||||
fit.SetMetric(compressionOptions.colorWeight.x, compressionOptions.colorWeight.y, compressionOptions.colorWeight.z);
|
||||
|
||||
int flags = 0;
|
||||
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||
int flags = 0;
|
||||
if (alphaMode == nvtt::AlphaMode_Transparency) flags |= nvsquish::kWeightColourByAlpha;
|
||||
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block->color);
|
||||
nvsquish::ColourSet colours((uint8 *)rgba.colors(), flags);
|
||||
fit.SetColourSet(&colours, 0);
|
||||
fit.Compress(&block->color);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void CompressorDXT5::compressBlock(ColorBlock & rgba, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
BlockDXT5 * block = new(output) BlockDXT5;
|
||||
|
@ -64,7 +64,7 @@ namespace nv
|
||||
|
||||
|
||||
// Normal CPU compressors.
|
||||
#if 1
|
||||
#if 0
|
||||
struct CompressorDXT1 : public ColorSetCompressor
|
||||
{
|
||||
virtual void compressBlock(ColorSet & set, nvtt::AlphaMode alphaMode, const nvtt::CompressionOptions::Private & compressionOptions, void * output);
|
||||
|
@ -310,7 +310,7 @@ void PixelFormatConverter::compress(nvtt::AlphaMode /*alphaMode*/, uint w, uint
|
||||
{
|
||||
for (uint y = 0; y < h; y++)
|
||||
{
|
||||
const float * src = (const float *)data + y * w;
|
||||
const float * src = (const float *)data + (z * h + y) * w;
|
||||
|
||||
BitStream stream(dst);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user