Merge changes from the witness.

This commit is contained in:
castano
2011-09-27 17:48:46 +00:00
parent 9c0658edca
commit 3c0ab2d3f3
47 changed files with 1811 additions and 186 deletions

View File

@ -0,0 +1,346 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="nvthread"
ProjectGUID="{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}"
RootNamespace="nvthread"
Keyword="Win32Proj"
TargetFrameworkVersion="131072"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="0"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="3"
InlineFunctionExpansion="0"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="0"
OmitFramePointers="true"
EnableFiberSafeOptimizations="true"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="false"
EnableEnhancedInstructionSet="2"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="0"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="3"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
WholeProgramOptimization="true"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="false"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="..\..\..\src\nvthread\Atomic.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Event.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Event.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Mutex.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Mutex.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\nvthread.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\nvthread.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\ParallelFor.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\ParallelFor.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Thread.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Thread.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\ThreadPool.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\ThreadPool.h"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -4,6 +4,7 @@ Microsoft Visual Studio Solution File, Format Version 10.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38} {CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB} = {3DD3A43D-C6EA-460F-821B-6C339A03C5BB}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669} {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
@ -88,6 +89,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "imperativeapi", "imperative
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc6h", "bc6h\bc6h.vcproj", "{C33787E3-5564-4834-9FE3-A9020455A669}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc6h", "bc6h\bc6h.vcproj", "{C33787E3-5564-4834-9FE3-A9020455A669}"
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvthread", "nvthread\nvthread.vcproj", "{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Mixed Platforms = Debug (no cuda)|Mixed Platforms Debug (no cuda)|Mixed Platforms = Debug (no cuda)|Mixed Platforms
@ -457,6 +460,28 @@ Global
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Win32.Build.0 = Release|Win32 {C33787E3-5564-4834-9FE3-A9020455A669}.Release|Win32.Build.0 = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.ActiveCfg = Release|x64 {C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.ActiveCfg = Release|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.Build.0 = Release|x64 {C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.Build.0 = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Win32.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|x64.Build.0 = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Mixed Platforms.Build.0 = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Win32.ActiveCfg = Debug|Win32
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Win32.Build.0 = Debug|Win32
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|x64.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|x64.Build.0 = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Win32.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|x64.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|x64.Build.0 = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Mixed Platforms.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Mixed Platforms.Build.0 = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Win32.ActiveCfg = Release|Win32
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Win32.Build.0 = Release|Win32
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|x64.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|x64.Build.0 = Release|x64
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@ -6,6 +6,7 @@ INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/stb)
SUBDIRS(nvcore) SUBDIRS(nvcore)
SUBDIRS(nvmath) SUBDIRS(nvmath)
SUBDIRS(nvimage) SUBDIRS(nvimage)
SUBDIRS(nvthread)
SUBDIRS(nvtt) SUBDIRS(nvtt)
# OpenGL # OpenGL

View File

@ -78,8 +78,8 @@ namespace nv
} }
template <typename T> template <typename T>
bool find(const T & element, const T * restrict ptr, uint count, uint * index) { bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
for (uint i = 0; i < count; i++) { for (uint i = begin; i < end; i++) {
if (ptr[i] == element) { if (ptr[i] == element) {
if (index != NULL) *index = i; if (index != NULL) *index = i;
return true; return true;
@ -257,15 +257,15 @@ namespace nv
} }
/// Return true if element found. /// Return true if element found.
NV_FORCEINLINE bool find(const T & element, uint * index) const NV_FORCEINLINE bool find(const T & element, uint * indexPtr) const
{ {
return find(element, 0, m_size, index); return find(element, 0, m_size, indexPtr);
} }
/// Return true if element found within the given range. /// Return true if element found within the given range.
NV_FORCEINLINE bool find(const T & element, uint first, uint count, uint * index) const NV_FORCEINLINE bool find(const T & element, uint begin, uint end, uint * indexPtr) const
{ {
return ::nv::find(element, m_buffer + first, count, index); return ::nv::find(element, m_buffer, begin, end, indexPtr);
} }
/// Remove the element at the given index. This is an expensive operation! /// Remove the element at the given index. This is an expensive operation!

View File

@ -448,19 +448,6 @@ namespace
/** Win32 assert handler. */ /** Win32 assert handler. */
struct Win32AssertHandler : public AssertHandler struct Win32AssertHandler : public AssertHandler
{ {
// Code from Daniel Vogel.
static bool isDebuggerPresent()
{
HINSTANCE kernel32 = GetModuleHandle("kernel32.dll");
if (kernel32) {
FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
return true;
}
}
return false;
}
// Flush the message queue. This is necessary for the message box to show up. // Flush the message queue. This is necessary for the message box to show up.
static void flushMessageQueue() static void flushMessageQueue()
{ {
@ -487,7 +474,7 @@ namespace
nvDebug( error_string.str() ); nvDebug( error_string.str() );
} }
if (isDebuggerPresent()) { if (debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG; return NV_ABORT_DEBUG;
} }
@ -522,15 +509,6 @@ namespace
/** Xbox360 assert handler. */ /** Xbox360 assert handler. */
struct Xbox360AssertHandler : public AssertHandler struct Xbox360AssertHandler : public AssertHandler
{ {
static bool isDebuggerPresent()
{
#ifdef _DEBUG
return DmIsDebuggerPresent() == TRUE;
#else
return false;
#endif
}
// Assert handler method. // Assert handler method.
virtual int assertion( const char * exp, const char * file, int line, const char * func/*=NULL*/ ) virtual int assertion( const char * exp, const char * file, int line, const char * func/*=NULL*/ )
{ {
@ -546,7 +524,7 @@ namespace
nvDebug( error_string.str() ); nvDebug( error_string.str() );
} }
if (isDebuggerPresent()) { if (debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG; return NV_ABORT_DEBUG;
} }
@ -563,26 +541,6 @@ namespace
/** Unix assert handler. */ /** Unix assert handler. */
struct UnixAssertHandler : public AssertHandler struct UnixAssertHandler : public AssertHandler
{ {
bool isDebuggerPresent()
{
#if NV_OS_DARWIN
int mib[4];
struct kinfo_proc info;
size_t size;
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PID;
mib[3] = getpid();
size = sizeof(info);
info.kp_proc.p_flag = 0;
sysctl(mib,4,&info,&size,NULL,0);
return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
#else
// if ppid != sid, some process spawned our app, probably a debugger.
return getsid(getpid()) != getppid();
#endif
}
// Assert handler method. // Assert handler method.
virtual int assertion(const char * exp, const char * file, int line, const char * func) virtual int assertion(const char * exp, const char * file, int line, const char * func)
{ {
@ -594,7 +552,7 @@ namespace
} }
#if _DEBUG #if _DEBUG
if (isDebuggerPresent()) { if (debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG; return NV_ABORT_DEBUG;
} }
#endif #endif
@ -702,7 +660,10 @@ void debug::enableSigHandler()
// SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces // SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME); SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME);
SymInitialize(GetCurrentProcess(), NULL, TRUE); if (!SymInitialize(GetCurrentProcess(), NULL, TRUE)) {
DWORD error = GetLastError();
nvDebug("SymInitialize returned error : %d\n", error);
}
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H) #elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
@ -743,3 +704,38 @@ void debug::disableSigHandler()
#endif #endif
} }
bool debug::isDebuggerPresent()
{
#if NV_OS_WIN32
HINSTANCE kernel32 = GetModuleHandle("kernel32.dll");
if (kernel32) {
FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
return true;
}
}
return false;
#elif NV_OS_XBOX
#ifdef _DEBUG
return DmIsDebuggerPresent() == TRUE;
#else
return false;
#endif
#elif NV_OS_DARWIN
int mib[4];
struct kinfo_proc info;
size_t size;
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PID;
mib[3] = getpid();
size = sizeof(info);
info.kp_proc.p_flag = 0;
sysctl(mib,4,&info,&size,NULL,0);
return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
#else
// if ppid != sid, some process spawned our app, probably a debugger.
return getsid(getpid()) != getppid();
#endif
}

View File

@ -10,6 +10,9 @@
# include <stdarg.h> // va_list # include <stdarg.h> // va_list
#endif #endif
// Make sure we are using our assert.
#undef assert
#define NV_ABORT_DEBUG 1 #define NV_ABORT_DEBUG 1
#define NV_ABORT_IGNORE 2 #define NV_ABORT_IGNORE 2
#define NV_ABORT_EXIT 3 #define NV_ABORT_EXIT 3
@ -116,12 +119,6 @@
#endif #endif
#if __cplusplus > 199711L
#define nvStaticCheck(x) static_assert(x)
#else
#define nvStaticCheck(x) typedef char NV_DO_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
#endif
NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL); NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL);
NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2))); NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
@ -166,6 +163,8 @@ namespace nv
NVCORE_API void enableSigHandler(); NVCORE_API void enableSigHandler();
NVCORE_API void disableSigHandler(); NVCORE_API void disableSigHandler();
NVCORE_API bool isDebuggerPresent();
} }
} // nv namespace } // nv namespace

View File

@ -2,7 +2,7 @@
#error "Do not include this file directly." #error "Do not include this file directly."
#endif #endif
//#include <stdint.h> // uint8_t, int8_t, ... #include <stdint.h> // uint8_t, int8_t, ... uintptr_t
#include <cstddef> // operator new, size_t, NULL #include <cstddef> // operator new, size_t, NULL
// Function linkage // Function linkage
@ -67,4 +67,4 @@ typedef int64_t int64;
// Aliases // Aliases
typedef uint32 uint; typedef uint32 uint;
*/ */

View File

@ -12,10 +12,10 @@
#include <new> // new and delete #include <new> // new and delete
#if NV_CC_GNUC #if NV_CC_GNUC
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16))) # define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
#else #else
# define NV_ALIGN_16 __declspec(align(16)) # define NV_ALIGN_16 __declspec(align(16))
#endif #endif
@ -43,15 +43,15 @@ extern "C" {
namespace nv { namespace nv {
// C++ helpers. // C++ helpers.
template <typename T> T * malloc(size_t count) { template <typename T> NV_FORCEINLINE T * malloc(size_t count) {
return (T *)::malloc(sizeof(T) * count); return (T *)::malloc(sizeof(T) * count);
} }
template <typename T> T * realloc(T * ptr, size_t count) { template <typename T> NV_FORCEINLINE T * realloc(T * ptr, size_t count) {
return (T *)::realloc(ptr, sizeof(T) * count); return (T *)::realloc(ptr, sizeof(T) * count);
} }
template <typename T> void free(const T * ptr) { template <typename T> NV_FORCEINLINE void free(const T * ptr) {
::free((void *)ptr); ::free((void *)ptr);
} }

View File

@ -72,7 +72,7 @@ namespace nv
#if NV_OS_WIN32 #if NV_OS_WIN32
return _ftell_nolock(m_fp); return _ftell_nolock(m_fp);
#else #else
return ftell(m_fp); return (uint)ftell(m_fp);
#endif #endif
} }
@ -85,9 +85,9 @@ namespace nv
uint end = _ftell_nolock(m_fp); uint end = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, pos, SEEK_SET); _fseek_nolock(m_fp, pos, SEEK_SET);
#else #else
uint pos = ftell(m_fp); uint pos = (uint)ftell(m_fp);
fseek(m_fp, 0, SEEK_END); fseek(m_fp, 0, SEEK_END);
uint end = ftell(m_fp); uint end = (uint)ftell(m_fp);
fseek(m_fp, pos, SEEK_SET); fseek(m_fp, pos, SEEK_SET);
#endif #endif
return end; return end;

View File

@ -189,7 +189,7 @@ StringBuilder::StringBuilder() : m_size(0), m_str(NULL)
} }
/** Preallocate space. */ /** Preallocate space. */
StringBuilder::StringBuilder( int size_hint ) : m_size(size_hint) StringBuilder::StringBuilder( uint size_hint ) : m_size(size_hint)
{ {
nvDebugCheck(m_size > 0); nvDebugCheck(m_size > 0);
m_str = strAlloc(m_size); m_str = strAlloc(m_size);
@ -203,9 +203,15 @@ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
} }
/** Copy string. */ /** Copy string. */
StringBuilder::StringBuilder( const char * s, int extra_size_hint/*=0*/ ) : m_size(0), m_str(NULL) StringBuilder::StringBuilder(const char * s) : m_size(0), m_str(NULL)
{ {
copy(s, extra_size_hint); copy(s);
}
/** Copy string. */
StringBuilder::StringBuilder(const char * s, uint len) : m_size(0), m_str(NULL)
{
copy(s, len);
} }
/** Delete the string. */ /** Delete the string. */
@ -396,15 +402,25 @@ StringBuilder & StringBuilder::reserve( uint size_hint )
/** Copy a string safely. */ /** Copy a string safely. */
StringBuilder & StringBuilder::copy( const char * s, int extra_size/*=0*/ ) StringBuilder & StringBuilder::copy(const char * s)
{ {
nvCheck( s != NULL ); nvCheck( s != NULL );
const uint str_size = uint(strlen( s )) + 1; const uint str_size = uint(strlen( s )) + 1;
reserve(str_size + extra_size); reserve(str_size);
memcpy(m_str, s, str_size); memcpy(m_str, s, str_size);
return *this; return *this;
} }
/** Copy a string safely. */
StringBuilder & StringBuilder::copy(const char * s, uint len)
{
nvCheck( s != NULL );
const uint str_size = len + 1;
reserve(str_size);
strCpy(m_str, str_size, s, len);
return *this;
}
/** Copy an StringBuilder. */ /** Copy an StringBuilder. */
StringBuilder & StringBuilder::copy( const StringBuilder & s ) StringBuilder & StringBuilder::copy( const StringBuilder & s )

View File

@ -59,9 +59,10 @@ namespace nv
public: public:
StringBuilder(); StringBuilder();
explicit StringBuilder( int size_hint ); explicit StringBuilder( uint size_hint );
StringBuilder( const char * str, int extra_size_hint = 0); StringBuilder(const char * str);
StringBuilder( const StringBuilder & ); StringBuilder(const char * str, uint len);
StringBuilder(const StringBuilder & other);
~StringBuilder(); ~StringBuilder();
@ -75,9 +76,10 @@ namespace nv
StringBuilder & number( int i, int base = 10 ); StringBuilder & number( int i, int base = 10 );
StringBuilder & number( uint i, int base = 10 ); StringBuilder & number( uint i, int base = 10 );
StringBuilder & reserve( uint size_hint ); StringBuilder & reserve(uint size_hint);
StringBuilder & copy( const char * str, int extra_size/*=0*/ ); StringBuilder & copy(const char * str);
StringBuilder & copy( const StringBuilder & str ); StringBuilder & copy(const char * str, uint len);
StringBuilder & copy(const StringBuilder & str);
StringBuilder & toLower(); StringBuilder & toLower();
StringBuilder & toUpper(); StringBuilder & toUpper();
@ -145,7 +147,7 @@ namespace nv
public: public:
Path() : StringBuilder() {} Path() : StringBuilder() {}
explicit Path(int size_hint) : StringBuilder(size_hint) {} explicit Path(int size_hint) : StringBuilder(size_hint) {}
Path(const char * str, int extra_size_hint = 0) : StringBuilder(str, extra_size_hint) {} Path(const char * str) : StringBuilder(str) {}
Path(const Path & path) : StringBuilder(path) {} Path(const Path & path) : StringBuilder(path) {}
const char * fileName() const; const char * fileName() const;

View File

@ -7,9 +7,76 @@
#include "nvcore.h" #include "nvcore.h"
#include "Debug.h" // nvDebugCheck #include "Debug.h" // nvDebugCheck
// Just in case. Grrr.
#undef min
#undef max
namespace nv namespace nv
{ {
// Less error prone than casting. From CB:
// http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
inline int8 asSigned(uint8 x) { return (int8) x; }
inline int16 asSigned(uint16 x) { return (int16) x; }
inline int32 asSigned(uint32 x) { return (int32) x; }
inline int64 asSigned(uint64 x) { return (int64) x; }
inline uint8 asUnsigned(int8 x) { return (uint8) x; }
inline uint16 asUnsigned(int16 x) { return (uint16) x; }
inline uint32 asUnsigned(int32 x) { return (uint32) x; }
inline uint64 asUnsigned(int64 x) { return (uint64) x; }
/*
template <typename T> inline int8 toI8(T x) {
nvDebugCheck(x <= INT8_MAX);
nvDebugCheck(x >= INT8_MIN);
int8 y = (int8) x;
nvDebugCheck(x == (T)y);
return y;
}
template <typename T> inline uint8 toU8(T x) {
nvDebugCheck(x <= UINT8_MAX);
nvDebugCheck(x >= 0);
return (uint8) x;
}
template <typename T> inline int16 toI16(T x) {
nvDebugCheck(x <= INT16_MAX);
nvDebugCheck(x >= INT16_MIN);
return (int16) x;
}
template <typename T> inline uint16 toU16(T x) {
nvDebugCheck(x <= UINT16_MAX);
nvDebugCheck(x >= 0);
return (uint16) x;
}
template <typename T> inline int32 toI32(T x) {
nvDebugCheck(x <= INT32_MAX);
nvDebugCheck(x >= INT32_MIN);
return (int32) x;
}
template <typename T> inline uint32 toU32(T x) {
nvDebugCheck(x <= UINT32_MAX);
nvDebugCheck(x >= 0);
return (uint32) x;
}
template <typename T> inline int64 toI64(T x) {
nvDebugCheck(x <= INT64_MAX);
nvDebugCheck(x >= INT64_MIN);
return (int64) x;
}
template <typename T> inline uint64 toU64(T x) {
nvDebugCheck(x <= UINT64_MAX);
nvDebugCheck(x >= 0);
return (uint64) x;
}
*/
/// Swap two values. /// Swap two values.
template <typename T> template <typename T>
inline void swap(T & a, T & b) inline void swap(T & a, T & b)

View File

@ -4,9 +4,6 @@
#ifndef NV_CORE_H #ifndef NV_CORE_H
#define NV_CORE_H #define NV_CORE_H
// cmake config
#include <nvconfig.h>
// Function linkage // Function linkage
#if NVCORE_SHARED #if NVCORE_SHARED
#ifdef NVCORE_EXPORTS #ifdef NVCORE_EXPORTS
@ -91,7 +88,11 @@
// @@ NV_CC_MSVC7 // @@ NV_CC_MSVC7
// @@ NV_CC_MSVC8 // @@ NV_CC_MSVC8
#if defined POSH_COMPILER_GCC #if defined POSH_COMPILER_CLANG
# define NV_CC_CLANG 1
# define NV_CC_GCC 1 // Clang is compatible with GCC.
# define NV_CC_STRING "clang"
#elif defined POSH_COMPILER_GCC
# define NV_CC_GNUC 1 # define NV_CC_GNUC 1
# define NV_CC_STRING "gcc" # define NV_CC_STRING "gcc"
#elif defined POSH_COMPILER_MSVC #elif defined POSH_COMPILER_MSVC
@ -108,6 +109,18 @@
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING #define NV_ENDIAN_STRING POSH_ENDIAN_STRING
// Define the right printf prefix for size_t arguments:
#if POSH_64BIT_POINTER
# define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX
#else
# define NV_SIZET_PRINTF_PREFIX
#endif
// cmake config
#include "nvconfig.h"
// Type definitions: // Type definitions:
typedef posh_u8_t uint8; typedef posh_u8_t uint8;
typedef posh_i8_t int8; typedef posh_i8_t int8;
@ -144,6 +157,8 @@ typedef uint32 uint;
private: \ private: \
void *operator new(size_t size); \ void *operator new(size_t size); \
void *operator new[](size_t size); void *operator new[](size_t size);
//static void *operator new(size_t size); \
//static void *operator new[](size_t size);
// String concatenation macros. // String concatenation macros.
#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2) #define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
@ -153,6 +168,25 @@ typedef uint32 uint;
#define NV_STRING2(x) #x #define NV_STRING2(x) #x
#define NV_STRING(x) NV_STRING2(x) #define NV_STRING(x) NV_STRING2(x)
#if __cplusplus > 199711L
#define nvStaticCheck(x) static_assert(x)
#else
#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
#endif
#define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best.
// Make sure type definitions are fine.
NV_COMPILER_CHECK(sizeof(int8) == 1);
NV_COMPILER_CHECK(sizeof(uint8) == 1);
NV_COMPILER_CHECK(sizeof(int16) == 2);
NV_COMPILER_CHECK(sizeof(uint16) == 2);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0])) #define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#if 1 #if 1
@ -180,6 +214,7 @@ typedef uint32 uint;
// Null index. @@ Move this somewhere else... it's only used by nvmesh. // Null index. @@ Move this somewhere else... it's only used by nvmesh.
//const unsigned int NIL = unsigned int(~0); //const unsigned int NIL = unsigned int(~0);
//#define NIL uint(~0)
// Null pointer. // Null pointer.
#ifndef NULL #ifndef NULL

View File

@ -1418,7 +1418,7 @@ uint DirectDrawSurface::mipmapSize(uint mipmap) const
{ {
nvDebugCheck((header.pf.flags & DDPF_RGB) || (header.pf.flags & DDPF_LUMINANCE)); nvDebugCheck((header.pf.flags & DDPF_RGB) || (header.pf.flags & DDPF_LUMINANCE));
uint pitch = computeBytePitch(w, header.pf.bitcount, 8); // Asuming 8 bit alignment, which is the same D3DX expects. uint pitch = computeBytePitch(w, header.pf.bitcount, 1); // Asuming 1 byte alignment, which is the same D3DX expects.
return pitch * h * d; return pitch * h * d;
} }

View File

@ -181,7 +181,7 @@ void FloatImage::normalize(uint baseComponent)
for (uint i = 0; i < count; i++) { for (uint i = 0; i < count; i++) {
Vector3 normal(xChannel[i], yChannel[i], zChannel[i]); Vector3 normal(xChannel[i], yChannel[i], zChannel[i]);
normal = normalizeSafe(normal, Vector3(zero), 0.0f); normal = normalizeSafe(normal, Vector3(0), 0.0f);
xChannel[i] = normal.x; xChannel[i] = normal.x;
yChannel[i] = normal.y; yChannel[i] = normal.y;

View File

@ -56,6 +56,7 @@ namespace nv
//@{ //@{
NVIMAGE_API void clear(float f = 0.0f); NVIMAGE_API void clear(float f = 0.0f);
NVIMAGE_API void clear(uint component, float f = 0.0f); NVIMAGE_API void clear(uint component, float f = 0.0f);
NVIMAGE_API void copyChannel(uint src, uint dst);
NVIMAGE_API void normalize(uint base_component); NVIMAGE_API void normalize(uint base_component);
@ -113,8 +114,6 @@ namespace nv
uint pixelCount() const { return m_pixelCount; } uint pixelCount() const { return m_pixelCount; }
// @@ It would make sense to swap the order of the arguments so that 'c' is always first.
/** @name Pixel access. */ /** @name Pixel access. */
//@{ //@{
const float * channel(uint c) const; const float * channel(uint c) const;

View File

@ -70,14 +70,14 @@ namespace nv
inline const Color32 & Image::pixel(uint x, uint y) const inline const Color32 & Image::pixel(uint x, uint y) const
{ {
nvDebugCheck(x < width() && y < height()); nvDebugCheck(x < m_width && y < m_height);
return pixel(y * width() + x); return pixel(y * m_width + x);
} }
inline Color32 & Image::pixel(uint x, uint y) inline Color32 & Image::pixel(uint x, uint y)
{ {
nvDebugCheck(x < width() && y < height()); nvDebugCheck(x < m_width && y < m_height);
return pixel(y * width() + x); return pixel(y * m_width + x);
} }
} // nv namespace } // nv namespace

View File

@ -215,7 +215,7 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName)
StdInputStream stream(fileName); StdInputStream stream(fileName);
if (stream.isError()) { if (stream.isError()) {
return false; return NULL;
} }
return loadFloat(fileName, stream); return loadFloat(fileName, stream);
@ -324,9 +324,9 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount) bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount)
{ {
#if !defined(HAVE_FREEIMAGE)
const char * extension = Path::extension(fileName); const char * extension = Path::extension(fileName);
#if !defined(HAVE_FREEIMAGE)
#if defined(HAVE_OPENEXR) #if defined(HAVE_OPENEXR)
if (strCaseCmp(extension, ".exr") == 0) { if (strCaseCmp(extension, ".exr") == 0) {
return saveFloatEXR(fileName, fimage, baseComponent, componentCount); return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
@ -711,7 +711,7 @@ Image * nv::ImageIO::loadTGA(Stream & s)
case TGA_TYPE_INDEXED: case TGA_TYPE_INDEXED:
if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) { if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) {
nvDebug( "*** loadTGA: Error, only 24bit paletted images are supported.\n" ); nvDebug( "*** loadTGA: Error, only 24bit paletted images are supported.\n" );
return false; return NULL;
} }
pal = true; pal = true;
break; break;
@ -732,7 +732,7 @@ Image * nv::ImageIO::loadTGA(Stream & s)
default: default:
nvDebug( "*** loadTGA: Error, unsupported image type.\n" ); nvDebug( "*** loadTGA: Error, unsupported image type.\n" );
return false; return NULL;
} }
const uint pixel_size = (tga.pixel_size/8); const uint pixel_size = (tga.pixel_size/8);
@ -1369,7 +1369,7 @@ Image * nv::ImageIO::loadJPG(Stream & s)
// Read the entire file. // Read the entire file.
Array<uint8> byte_array; Array<uint8> byte_array;
byte_array.resize(s.size()); byte_array.resize(s.size());
s.serialize(byte_array.mutableBuffer(), s.size()); s.serialize(byte_array.buffer(), s.size());
jpeg_decompress_struct cinfo; jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr; jpeg_error_mgr jerr;

View File

@ -487,46 +487,126 @@ nv::half_to_float( uint16 h )
return (f_result); return (f_result);
} }
uint32
nv::fast_half_to_float( uint16 h )
{
const uint32 h_e_mask = _uint32_li( 0x00007c00 );
const uint32 h_m_mask = _uint32_li( 0x000003ff );
const uint32 h_s_mask = _uint32_li( 0x00008000 );
const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 );
const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d );
const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 );
const uint32 f_e_mask = _uint32_li( 0x7f800000 );
const uint32 f_m_mask = _uint32_li( 0x007fffff );
const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e );
const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 );
const uint32 f_e_pos = _uint32_li( 0x00000017 );
const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff );
const uint32 h_e = _uint32_and( h, h_e_mask );
const uint32 h_m = _uint32_and( h, h_m_mask );
const uint32 h_s = _uint32_and( h, h_s_mask );
const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
const uint32 h_m_nlz = _uint32_cntlz( h_m );
const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset );
const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset );
const uint32 f_em = _uint32_or( f_e, f_m );
const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa );
const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask );
const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
const uint32 f_em_nan = _uint32_or( f_e_mask, f_m );
const uint32 is_e_eqz_msb = _uint32_dec( h_e );
const uint32 is_m_nez_msb = _uint32_neg( h_m );
const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
const uint32 is_zero = _uint32_ext( is_zero_msb );
const uint32 f_zero_result = _uint32_andc( f_em, is_zero );
const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
const uint32 f_result = _uint32_or( f_s, f_denorm_result );
return (f_result); // @@ These tables could be smaller.
static uint32 mantissa_table[2048];
static uint32 exponent_table[64];
static uint32 offset_table[64];
void nv::half_init_tables()
{
// Init mantissa table.
mantissa_table[0] = 0;
for (int i = 1; i < 1024; i++) {
uint m = i << 13;
uint e = 0;
while ((m & 0x00800000) == 0) {
e -= 0x00800000;
m <<= 1;
}
m &= ~0x00800000;
e += 0x38800000;
mantissa_table[i] = m | e;
}
for (int i = 1024; i < 2048; i++) {
mantissa_table[i] = 0x38000000 + ((i - 1024) << 13);
}
// Init exponent table.
exponent_table[0] = 0;
for (int i = 1; i < 31; i++) {
exponent_table[i] = (i << 23);
}
exponent_table[31] = 0x47800000;
exponent_table[32] = 0x80000000;
for (int i = 33; i < 63; i++) {
exponent_table[i] = 0x80000000 + ((i - 32) << 23);
}
exponent_table[63] = 0xC7800000;
// Init offset table.
offset_table[0] = 0;
for (int i = 1; i < 32; i++) {
offset_table[i] = 1024;
}
offset_table[32] = 0;
for (int i = 33; i < 64; i++) {
offset_table[i] = 1024;
}
/*for (int i = 0; i < 64; i++) {
offset_table[i] = ((i & 31) != 0) * 1024;
}*/
} }
// Fast half to float conversion based on:
// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
uint32 nv::fast_half_to_float(uint16 h)
{
uint exp = h >> 10;
return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
}
#if 0
// Inaccurate conversion suggested at the ffmpeg mailing list:
// http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html
uint32 nv::fast_half_to_float(uint16 v)
{
if (v & 0x8000) return 0;
uint exp = v >> 10;
if (!exp) return (v>>9)&1;
if (exp >= 15) return 0xffff;
v <<= 6;
return (v+(1<<16)) >> (15-exp);
}
#endif
#if 0
// Some more from a gamedev thread:
// http://www.devmaster.net/forums/showthread.php?t=10924
// I believe it does not handle specials either.
// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though.
static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000};
__asm
{
movaps xmm1, xmm0 // Input in xmm0
movaps xmm2, xmm0
andps xmm0, half_sign
andps xmm1, half_exponent
andps xmm2, half_mantissa
paddd xmm1, half_bias_offset
pslld xmm0, 16
pslld xmm1, 13
pslld xmm2, 13
orps xmm1, xmm2
orps xmm0, xmm1 // Result in xmm0
}
#endif

View File

@ -9,8 +9,9 @@ namespace nv {
uint32 half_to_float( uint16 h ); uint32 half_to_float( uint16 h );
uint16 half_from_float( uint32 f ); uint16 half_from_float( uint32 f );
// Does not handle NaN or infinity. void half_init_tables();
uint32 fast_half_to_float( uint16 h );
uint32 fast_half_to_float(uint16 h);
inline uint16 to_half(float c) { inline uint16 to_half(float c) {
union { float f; uint32 u; } f; union { float f; uint32 u; } f;

View File

@ -9,15 +9,14 @@
namespace nv namespace nv
{ {
enum zero_t { zero };
enum identity_t { identity }; enum identity_t { identity };
class NVMATH_CLASS Matrix3 class NVMATH_CLASS Matrix3
{ {
public: public:
Matrix3(); Matrix3();
Matrix3(zero_t); explicit Matrix3(float f);
Matrix3(identity_t); explicit Matrix3(identity_t);
Matrix3(const Matrix3 & m); Matrix3(const Matrix3 & m);
Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2); Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2);
@ -41,10 +40,10 @@ namespace nv
inline Matrix3::Matrix3() {} inline Matrix3::Matrix3() {}
inline Matrix3::Matrix3(zero_t) inline Matrix3::Matrix3(float f)
{ {
for(int i = 0; i < 9; i++) { for(int i = 0; i < 9; i++) {
m_data[i] = 0.0f; m_data[i] = f;
} }
} }
@ -204,11 +203,11 @@ namespace nv
typedef Matrix const & Arg; typedef Matrix const & Arg;
Matrix(); Matrix();
Matrix(zero_t); explicit Matrix(float f);
Matrix(identity_t); explicit Matrix(identity_t);
Matrix(const Matrix & m); Matrix(const Matrix & m);
Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3); Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
Matrix(const scalar m[]); // m is assumed to contain 16 elements //explicit Matrix(const scalar m[]); // m is assumed to contain 16 elements
scalar data(uint idx) const; scalar data(uint idx) const;
scalar & data(uint idx); scalar & data(uint idx);
@ -237,7 +236,7 @@ namespace nv
{ {
} }
inline Matrix::Matrix(zero_t) inline Matrix::Matrix(float f)
{ {
for(int i = 0; i < 16; i++) { for(int i = 0; i < 16; i++) {
m_data[i] = 0.0f; m_data[i] = 0.0f;
@ -268,12 +267,12 @@ namespace nv
m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w; m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w;
} }
inline Matrix::Matrix(const scalar m[]) /*inline Matrix::Matrix(const scalar m[])
{ {
for(int i = 0; i < 16; i++) { for(int i = 0; i < 16; i++) {
m_data[i] = m[i]; m_data[i] = m[i];
} }
} }*/
// Accessors // Accessors
@ -456,7 +455,7 @@ namespace nv
/// Get frustum matrix. /// Get frustum matrix.
inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar) inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar)
{ {
Matrix m(zero); Matrix m(0.0f);
scalar doubleznear = 2.0f * zNear; scalar doubleznear = 2.0f * zNear;
scalar one_deltax = 1.0f / (xmax - xmin); scalar one_deltax = 1.0f / (xmax - xmin);
@ -477,7 +476,7 @@ namespace nv
/// Get infinite frustum matrix. /// Get infinite frustum matrix.
inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear) inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear)
{ {
Matrix m(zero); Matrix m(0.0f);
scalar doubleznear = 2.0f * zNear; scalar doubleznear = 2.0f * zNear;
scalar one_deltax = 1.0f / (xmax - xmin); scalar one_deltax = 1.0f / (xmax - xmin);

View File

@ -100,6 +100,7 @@ namespace nv
explicit Vector4(scalar x); explicit Vector4(scalar x);
Vector4(scalar x, scalar y, scalar z, scalar w); Vector4(scalar x, scalar y, scalar z, scalar w);
Vector4(Vector2::Arg v, scalar z, scalar w); Vector4(Vector2::Arg v, scalar z, scalar w);
Vector4(Vector2::Arg v, Vector2::Arg u);
Vector4(Vector3::Arg v, scalar w); Vector4(Vector3::Arg v, scalar w);
Vector4(Vector4::Arg v); Vector4(Vector4::Arg v);
// Vector4(const Quaternion & v); // Vector4(const Quaternion & v);
@ -107,6 +108,7 @@ namespace nv
const Vector4 & operator=(Vector4::Arg v); const Vector4 & operator=(Vector4::Arg v);
Vector2 xy() const; Vector2 xy() const;
Vector2 zw() const;
Vector3 xyz() const; Vector3 xyz() const;
const scalar * ptr() const; const scalar * ptr() const;
@ -290,6 +292,7 @@ namespace nv
inline Vector4::Vector4(scalar f) : x(f), y(f), z(f), w(f) {} inline Vector4::Vector4(scalar f) : x(f), y(f), z(f), w(f) {}
inline Vector4::Vector4(scalar x, scalar y, scalar z, scalar w) : x(x), y(y), z(z), w(w) {} inline Vector4::Vector4(scalar x, scalar y, scalar z, scalar w) : x(x), y(y), z(z), w(w) {}
inline Vector4::Vector4(Vector2::Arg v, scalar z, scalar w) : x(v.x), y(v.y), z(z), w(w) {} inline Vector4::Vector4(Vector2::Arg v, scalar z, scalar w) : x(v.x), y(v.y), z(z), w(w) {}
inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {}
inline Vector4::Vector4(Vector3::Arg v, scalar w) : x(v.x), y(v.y), z(v.z), w(w) {} inline Vector4::Vector4(Vector3::Arg v, scalar w) : x(v.x), y(v.y), z(v.z), w(w) {}
inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {} inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
@ -307,6 +310,11 @@ namespace nv
return Vector2(x, y); return Vector2(x, y);
} }
inline Vector2 Vector4::zw() const
{
return Vector2(z, w);
}
inline Vector3 Vector4::xyz() const inline Vector3 Vector4::xyz() const
{ {
return Vector3(x, y, z); return Vector3(x, y, z);
@ -469,6 +477,14 @@ namespace nv
return scale(v, 1.0f / l); return scale(v, 1.0f / l);
} }
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector2 normalizeFast(Vector2::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
{ {
@ -498,6 +514,14 @@ namespace nv
return vf; return vf;
} }
inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
{
Vector2 v0 = a - c;
Vector2 v1 = b - c;
return (v0.x * v1.y - v0.y * v1.x);
}
// Vector3 // Vector3
@ -570,10 +594,10 @@ namespace nv
return scale(v, 1.0f/s); return scale(v, 1.0f/s);
} }
inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s) /*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s)
{ {
return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s); return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s);
} }*/
inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, scalar t) inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, scalar t)
{ {
@ -624,6 +648,15 @@ namespace nv
return scale(v, 1.0f / l); return scale(v, 1.0f / l);
} }
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector3 normalizeFast(Vector3::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON) inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON)
{ {
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon); return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon);
@ -762,6 +795,15 @@ namespace nv
return scale(v, 1.0f / l); return scale(v, 1.0f / l);
} }
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector4 normalizeFast(Vector4::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON) inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON)
{ {
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon); return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon);

View File

@ -4,8 +4,9 @@
#ifndef NV_MATH_H #ifndef NV_MATH_H
#define NV_MATH_H #define NV_MATH_H
#include <nvcore/nvcore.h> #include "nvcore/nvcore.h"
#include <nvcore/Debug.h> #include "nvcore/Debug.h"
#include "nvcore/Utils.h" // clamp
#include <math.h> #include <math.h>
#include <limits.h> // INT_MAX #include <limits.h> // INT_MAX
@ -194,7 +195,7 @@ namespace nv
return f - floor(f); return f - floor(f);
} }
inline float fround(float f) inline float fround(float f) // @@ rename floatRound
{ {
// @@ Do something better. // @@ Do something better.
return float(iround(f)); return float(iround(f));
@ -210,6 +211,29 @@ namespace nv
} }
} }
inline float saturate(float f) {
return clamp(f, 0.0f, 1.0f);
}
inline float linearstep(float edge0, float edge1, float x) {
// Scale, bias and saturate x to 0..1 range
return saturate((x - edge0) / (edge1 - edge0));
}
inline float smoothstep(float edge0, float edge1, float x) {
x = linearstep(edge0, edge1, x);
// Evaluate polynomial
return x*x*(3 - 2*x);
}
inline int sign(float a)
{
if (a > 0.0f) return 1;
if (a < 0.0f) return -1;
return 0;
}
} // nv } // nv
#endif // NV_MATH_H #endif // NV_MATH_H

View File

@ -0,0 +1,26 @@
PROJECT(nvthreads)
SET(THREADS_SRCS
nvthreads.h
Mutex.h Mutex.cpp
SpinWaiter.h SpinWaiter.cpp
Thread.h Thread.cpp
ThreadLocalStorage.h ThreadLocalStorage.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# targets
ADD_DEFINITIONS(-DNVTHREADS_EXPORTS)
IF(NVTHREADS_SHARED)
ADD_LIBRARY(nvthreads SHARED ${THREADS_SRCS})
ELSE(NVTHREADS_SHARED)
ADD_LIBRARY(nvthreads ${THREADS_SRCS})
ENDIF(NVTHREADS_SHARED)
TARGET_LINK_LIBRARIES(nvthreads ${LIBS} nvcore)
INSTALL(TARGETS nvthreads
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static)

52
src/nvthread/Event.cpp Normal file
View File

@ -0,0 +1,52 @@
// This code is in the public domain -- castano@gmail.com
#include "Event.h"
#if NV_OS_WIN32
#include "Win32.h"
#elif NV_OS_UNIX
#include <pthread.h>
#endif
using namespace nv;
#if NV_OS_WIN32
struct Event::Private {
HANDLE handle;
};
Event::Event() : m(new Private) {
m->handle = CreateEvent(NULL, FALSE, FALSE, NULL);
}
Event::~Event() {
CloseHandle(m->handle);
}
void Event::post() {
SetEvent(m->handle);
}
void Event::wait() {
WaitForSingleObject(m->handle, INFINITE);
}
/*static*/ void Event::post(Event * events, uint count) {
for (uint i = 0; i < count; i++) {
events[i].post();
}
}
/*static*/ void Event::wait(Event * events, uint count) {
// @@ Use wait for multiple objects?
for (uint i = 0; i < count; i++) {
events[i].wait();
}
}
#elif NV_OS_UNIX
// @@
#endif

34
src/nvthread/Event.h Normal file
View File

@ -0,0 +1,34 @@
// This code is in the public domain -- castano@gmail.com
#pragma once
#ifndef NV_THREAD_EVENT_H
#define NV_THREAD_EVENT_H
#include "nvthread.h"
#include "nvcore/Ptr.h"
namespace nv
{
// This is intended to be used by a single waiter thread.
class NVTHREAD_CLASS Event
{
NV_FORBID_COPY(Event);
public:
Event();
~Event();
void post();
void wait(); // Wait resets the event.
static void post(Event * events, uint count);
static void wait(Event * events, uint count);
private:
struct Private;
AutoPtr<Private> m;
};
} // nv namespace
#endif // NV_THREAD_EVENT_H

89
src/nvthread/Mutex.cpp Normal file
View File

@ -0,0 +1,89 @@
// This code is in the public domain -- castano@gmail.com
#include "Mutex.h"
#if NV_OS_WIN32
#include "Win32.h"
#elif NV_OS_UNIX
#include <pthread.h>
#include <errno.h> // EBUSY
#endif // NV_OS
using namespace nv;
#if NV_OS_WIN32
struct Mutex::Private {
CRITICAL_SECTION mutex;
};
Mutex::Mutex () : m(new Private)
{
InitializeCriticalSection(&m->mutex);
}
Mutex::~Mutex ()
{
DeleteCriticalSection(&m->mutex);
}
void Mutex::lock()
{
EnterCriticalSection(&m->mutex);
}
bool Mutex::tryLock()
{
return TryEnterCriticalSection(&m->mutex) != 0;
}
void Mutex::unlock()
{
LeaveCriticalSection(&m->mutex);
}
#elif NV_OS_UNIX
struct Mutex::Private {
pthread_mutex_t mutex;
};
Mutex::Mutex () : m(new Private)
{
int result = pthread_mutex_init(&m->mutex , NULL);
nvDebugCheck(result == 0);
}
Mutex::~Mutex ()
{
int result = pthread_mutex_destroy(&m->mutex);
nvDebugCheck(result == 0);
}
void Mutex::lock()
{
int result = pthread_mutex_lock(&m->mutex);
nvDebugCheck(result == 0);
}
bool Mutex::tryLock()
{
int result = pthread_mutex_trylock(&m->mutex);
nvDebugCheck(result == 0 || result == EBUSY);
return result == 0;
}
void Mutex::unlock()
{
int result = pthread_mutex_unlock(&m->mutex);
nvDebugCheck(result == 0);
}
#endif // NV_OS

47
src/nvthread/Mutex.h Normal file
View File

@ -0,0 +1,47 @@
// This code is in the public domain -- castano@gmail.com
#pragma once
#ifndef NV_THREAD_MUTEX_H
#define NV_THREAD_MUTEX_H
#include "nvthread.h"
#include "nvcore/Ptr.h"
namespace nv
{
class NVTHREAD_CLASS Mutex
{
NV_FORBID_COPY(Mutex);
public:
Mutex ();
~Mutex ();
void lock();
bool tryLock();
void unlock();
private:
struct Private;
AutoPtr<Private> m;
};
// Templated lock that can be used with any mutex.
template <class M>
class Lock
{
NV_FORBID_COPY(Lock);
public:
Lock (M & m) : m_mutex (m) { m_mutex.lock(); }
~Lock () { m_mutex.unlock(); }
private:
M & m_mutex;
};
} // nv namespace
#endif // NV_THREAD_MUTEX_H

View File

@ -0,0 +1,61 @@
// This code is in the public domain -- Ignacio Casta<74>o <castano@gmail.com>
#include "ParallelFor.h"
#include "Thread.h"
#include "Atomic.h"
#include "ThreadPool.h"
using namespace nv;
#define ENABLE_PARALLEL_FOR 1
void worker(void * arg) {
ParallelFor * owner = (ParallelFor *)arg;
while(true) {
// Consume one element at a time. @@ Might be more efficient to have custom grain.
uint i = atomicIncrement(&owner->idx);
if (i > owner->count) {
break;
}
owner->task(owner->context, i - 1);
}
}
ParallelFor::ParallelFor(ForTask * task, void * context) : task(task), context(context) {
#if ENABLE_PARALLEL_FOR
pool = ThreadPool::acquire();
#endif
}
ParallelFor::~ParallelFor() {
#if ENABLE_PARALLEL_FOR
ThreadPool::release(pool);
#endif
}
void ParallelFor::run(uint count) {
#if ENABLE_PARALLEL_FOR
storeRelease(&this->count, count);
// Init atomic counter to zero.
storeRelease(&idx, 0);
// Start threads.
pool->start(worker, this);
// Wait for all threads to complete.
pool->wait();
nvDebugCheck(idx >= count);
#else
for (int i = 0; i < count; i++) {
task(context, i);
}
#endif
}

View File

@ -0,0 +1,38 @@
// This code is in the public domain -- Ignacio Casta<74>o <castano@gmail.com>
#pragma once
#ifndef NV_THREAD_PARALLELFOR_H
#define NV_THREAD_PARALLELFOR_H
#include "nvthread.h"
//#include "Atomic.h" // atomic<uint>
namespace nv
{
class Thread;
class ThreadPool;
typedef void ForTask(void * context, int id);
struct ParallelFor {
ParallelFor(ForTask * task, void * context);
~ParallelFor();
void run(uint count);
// Invariant:
ForTask * task;
void * context;
ThreadPool * pool;
//uint workerCount; // @@ Move to thread pool.
//Thread * workers;
// State:
uint count;
/*atomic<uint>*/ uint idx;
};
} // nv namespace
#endif // NV_THREAD_PARALLELFOR_H

136
src/nvthread/Thread.cpp Normal file
View File

@ -0,0 +1,136 @@
// This code is in the public domain -- castano@gmail.com
#include "Thread.h"
#if NV_OS_WIN32
#include "Win32.h"
#elif NV_OS_UNIX
#include <pthread.h>
#include <unistd.h> // usleep
#endif
using namespace nv;
struct Thread::Private
{
#if NV_OS_WIN32
HANDLE thread;
#elif NV_OS_UNIX
pthread_t thread;
#endif
ThreadFunc * func;
void * arg;
};
#if NV_OS_WIN32
unsigned long __stdcall threadFunc(void * arg) {
Thread * thread = (Thread *)arg;
thread->func(thread->arg);
return 0;
}
#elif NV_OS_UNIX
extern "C" void * threadFunc(void * arg) {
Thread * thread = (Thread *)arg;
thread->func(thread->arg);
pthread_exit(0);
}
#endif
Thread::Thread() : p(new Private)
{
p->thread = 0;
}
Thread::~Thread()
{
nvDebugCheck(p->thread == 0);
}
void Thread::start(ThreadFunc * func, void * arg)
{
this->func = func;
this->arg = arg;
#if NV_OS_WIN32
p->thread = CreateThread(NULL, 0, threadFunc, this, 0, NULL);
//p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, this, 0, NULL); // @@ So that we can call CRT functions...
nvDebugCheck(p->thread != NULL);
#elif NV_OS_UNIX
int result = pthread_create(&p->thread, NULL, threadFunc, this);
nvDebugCheck(result == 0);
#endif
}
void Thread::wait()
{
#if NV_OS_WIN32
DWORD status = WaitForSingleObject (p->thread, INFINITE);
nvCheck (status == WAIT_OBJECT_0);
BOOL ok = CloseHandle (p->thread);
p->thread = NULL;
nvCheck (ok);
#elif NV_OS_UNIX
int result = pthread_join(p->thread, NULL);
p->thread = 0;
nvDebugCheck(result == 0);
#endif
}
bool Thread::isRunning () const
{
#if NV_OS_WIN32
return p->thread != NULL;
#elif NV_OS_UNIX
return p->thread != 0;
#endif
}
/*static*/ void Thread::spinWait(uint count)
{
for (uint i = 0; i < count; i++) {}
}
/*static*/ void Thread::yield()
{
#if NV_OS_WIN32
SwitchToThread();
#elif NV_OS_UNIX
int result = sched_yield();
nvDebugCheck(result == 0);
#endif
}
/*static*/ void Thread::sleep(uint ms)
{
#if NV_OS_WIN32
Sleep(ms);
#elif NV_OS_UNIX
usleep(1000 * ms);
#endif
}
/*static*/ void Thread::wait(Thread * threads, uint count)
{
/*#if NV_OS_WIN32
// @@ Is there any advantage in doing this?
nvDebugCheck(count < MAXIMUM_WAIT_OBJECTS);
HANDLE * handles = new HANDLE[count];
for (uint i = 0; i < count; i++) {
handles[i] = threads->p->thread;
}
DWORD result = WaitForMultipleObjects(count, handles, TRUE, INFINITE);
delete [] handles;
#else*/
for (uint i = 0; i < count; i++) {
threads[i].wait();
}
//#endif
}

46
src/nvthread/Thread.h Normal file
View File

@ -0,0 +1,46 @@
// This code is in the public domain -- castano@gmail.com
#pragma once
#ifndef NV_THREAD_THREAD_H
#define NV_THREAD_THREAD_H
#include "nvthread.h"
#include "nvcore/Ptr.h"
namespace nv
{
typedef void ThreadFunc(void * arg);
class NVTHREAD_CLASS Thread
{
NV_FORBID_COPY(Thread);
public:
Thread();
~Thread();
void start(ThreadFunc * func, void * arg);
void wait();
bool isRunning() const;
static void spinWait(uint count);
static void yield();
static void sleep(uint ms);
static void wait(Thread * threads, uint count);
private:
struct Private;
AutoPtr<Private> p;
public:
ThreadFunc * func;
void * arg;
};
} // nv namespace
#endif // NV_THREAD_THREAD_H

121
src/nvthread/ThreadPool.cpp Normal file
View File

@ -0,0 +1,121 @@
// This code is in the public domain -- castano@gmail.com
#include "ThreadPool.h"
#include "Mutex.h"
#include "Thread.h"
// Most of the time it's not necessary to protect the thread pool, but if it doesn't add a significant overhead, then it'd be safer to do it.
#define PROTECT_THREAD_POOL 1
using namespace nv;
#if PROTECT_THREAD_POOL
Mutex s_pool_mutex;
#endif
AutoPtr<ThreadPool> s_pool;
/*static*/ ThreadPool * ThreadPool::acquire()
{
#if PROTECT_THREAD_POOL
s_pool_mutex.lock(); // @@ If same thread tries to lock twice, this should assert.
#endif
if (s_pool == NULL) {
ThreadPool * p = new ThreadPool;
nvDebugCheck(s_pool == p);
}
return s_pool.ptr();
}
/*static*/ void ThreadPool::release(ThreadPool * pool)
{
nvDebugCheck(pool == s_pool);
// Make sure the threads of the pool are idle.
s_pool->wait();
#if PROTECT_THREAD_POOL
s_pool_mutex.unlock();
#endif
}
/*static*/ void ThreadPool::workerFunc(void * arg) {
uint i = (uint)arg;
while(true)
{
s_pool->startEvents[i].wait();
if (s_pool->func == NULL) {
return; // @@ should we post finish event anyway?
}
s_pool->func(s_pool->arg);
s_pool->finishEvents[i].post();
}
}
ThreadPool::ThreadPool()
{
s_pool = this; // Worker threads need this to be initialized before they start.
workerCount = nv::hardwareThreadCount();
workers = new Thread[workerCount];
startEvents = new Event[workerCount];
finishEvents = new Event[workerCount];
for (uint i = 0; i < workerCount; i++) {
workers[i].start(workerFunc, (void *)i);
}
allIdle = true;
}
ThreadPool::~ThreadPool()
{
// Set threads to terminate.
start(NULL, NULL);
// Wait until threads actually exit.
Thread::wait(workers, workerCount);
delete [] workers;
delete [] startEvents;
delete [] finishEvents;
}
void ThreadPool::start(ThreadFunc * func, void * arg)
{
// Wait until threads are idle.
wait();
// Set our desired function.
this->func = func;
this->arg = arg;
allIdle = false;
// Resume threads.
Event::post(startEvents, workerCount);
}
void ThreadPool::wait()
{
if (!allIdle)
{
// Wait for threads to complete.
Event::wait(finishEvents, workerCount);
allIdle = true;
}
}

49
src/nvthread/ThreadPool.h Normal file
View File

@ -0,0 +1,49 @@
// This code is in the public domain -- castano@gmail.com
#pragma once
#ifndef NV_THREAD_THREADPOOL_H
#define NV_THREAD_THREADPOOL_H
#include "nvthread.h"
#include "Event.h"
#include "Thread.h"
namespace nv {
class Thread;
class Event;
class ThreadPool {
NV_FORBID_COPY(ThreadPool);
public:
static ThreadPool * acquire();
static void release(ThreadPool *);
ThreadPool();
~ThreadPool();
void start(ThreadFunc * func, void * arg);
void wait();
private:
static void workerFunc(void * arg);
uint workerCount;
Thread * workers;
Event * startEvents;
Event * finishEvents;
uint allIdle;
// Current function:
ThreadFunc * func;
void * arg;
};
} // namespace nv
#endif // NV_THREAD_THREADPOOL_H

9
src/nvthread/Win32.h Normal file
View File

@ -0,0 +1,9 @@
// This code is in the public domain -- castano@gmail.com
// Never include this from a header file.
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#define _WIN32_WINNT 0x0400 // for SwitchToThread, TryEnterCriticalSection
#include <windows.h>
//#include <process.h> // for _beginthreadex

51
src/nvthread/nvthread.cpp Normal file
View File

@ -0,0 +1,51 @@
#include "nvthread.h"
#include "Thread.h"
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
using namespace nv;
// Find the number of cores in the system.
// Based on: http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
// @@ Distinguish between logical and physical cores?
uint nv::hardwareThreadCount() {
#if NV_OS_WIN32
SYSTEM_INFO sysinfo;
GetSystemInfo( &sysinfo );
return sysinfo.dwNumberOfProcessors;
#elif NV_OS_XBOX
return 3; // or 6?
#elif NV_OS_LINUX // Linux, Solaris, & AIX
return sysconf(_SC_NPROCESSORS_ONLN);
#elif NV_OS_DARWIN || NV_OS_FREEBSD
int numCPU;
int mib[4];
size_t len = sizeof(numCPU);
// set the mib for hw.ncpu
mib[0] = CTL_HW;
mib[1] = HW_AVAILCPU; // alternatively, try HW_NCPU;
// get the number of CPUs from the system
sysctl(mib, 2, &numCPU, &len, NULL, 0);
if (numCPU < 1) {
mib[1] = HW_NCPU;
sysctl( mib, 2, &numCPU, &len, NULL, 0 );
if (numCPU < 1) {
return 1; // Assume single core.
}
}
return numCPU;
#else
return 1; // Assume single core.
#endif
}

83
src/nvthread/nvthread.h Normal file
View File

@ -0,0 +1,83 @@
// This code is in the public domain -- castanyo@yahoo.es
#pragma once
#ifndef NV_THREAD_H
#define NV_THREAD_H
#include "nvcore/nvcore.h"
// Function linkage
#if NVTHREAD_SHARED
#ifdef NVTHREAD_EXPORTS
#define NVTHREAD_API DLL_EXPORT
#define NVTHREAD_CLASS DLL_EXPORT_CLASS
#else
#define NVTHREAD_API DLL_IMPORT
#define NVTHREAD_CLASS DLL_IMPORT
#endif
#else // NVMATH_SHARED
#define NVTHREAD_API
#define NVTHREAD_CLASS
#endif // NVMATH_SHARED
// Compiler barriers.
// See: http://en.wikipedia.org/wiki/Memory_ordering
#if NV_CC_MSVC
#include <intrin.h>
#pragma intrinsic(_WriteBarrier)
#define nvCompilerWriteBarrier _WriteBarrier
#pragma intrinsic(_ReadWriteBarrier)
#define nvCompilerReadWriteBarrier _ReadWriteBarrier
#if _MSC_VER >= 1400 // ReadBarrier is VC2005
#pragma intrinsic(_ReadBarrier)
#define nvCompilerReadBarrier _ReadBarrier
#else
#define nvCompilerReadBarrier _ReadWriteBarrier
#endif
#elif NV_CC_GNUC
#define nvCompilerReadWriteBarrier() asm volatile("" ::: "memory");
#define nvCompilerWriteBarrier nvCompilerReadWriteBarrier
#define nvCompilerReadBarrier nvCompilerReadWriteBarrier
#endif // NV_CC_MSVC
// @@ Memory barriers / fences.
// @@ Atomics.
/* Wrap this up:
#define YieldProcessor() __asm { rep nop }
#define YieldProcessor _mm_pause
#define YieldProcessor __yield
BOOL WINAPI SwitchToThread(void);
*/
namespace nv
{
// Reentrant.
uint hardwareThreadCount();
// Not thread-safe. Use from main thread only.
void initWorkers();
void shutWorkers();
void setWorkerFunction(void * func);
} // nv namespace
#endif // NV_THREAD_H

View File

@ -37,7 +37,7 @@ using namespace nv;
using namespace nvtt; using namespace nvtt;
void CompressorBC6::compressBlock(Tile & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{ {
NV_UNUSED(alphaMode); // ZOH does not support alpha. NV_UNUSED(alphaMode); // ZOH does not support alpha.
@ -56,7 +56,7 @@ void CompressorBC6::compressBlock(Tile & tile, AlphaMode alphaMode, const Compre
} }
void CompressorBC7::compressBlock(Tile & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output) void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{ {
// @@ TODO // @@ TODO
} }

View File

@ -481,10 +481,10 @@ void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY); err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4); int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size); outputOptions.outputHandler->writeData(rect.pBits, size);
} }
err = surface->UnlockRect(); err = surface->UnlockRect();
} }

View File

@ -110,7 +110,7 @@ namespace
{ {
nvDebugCheck(alignment >= 1); nvDebugCheck(alignment >= 1);
flush(); flush();
int remainder = (size_t)ptr % alignment; int remainder = (int)((uintptr_t)ptr % alignment);
if (remainder != 0) { if (remainder != 0) {
putBits(0, (alignment - remainder) * 8); putBits(0, (alignment - remainder) * 8);
} }

View File

@ -349,6 +349,8 @@ bool Compressor::Private::compress(AlphaMode alphaMode, int w, int h, int d, int
compressor->compress(alphaMode, w, h, d, rgba, dispatcher, compressionOptions, outputOptions); compressor->compress(alphaMode, w, h, d, rgba, dispatcher, compressionOptions, outputOptions);
} }
outputOptions.endImage();
return true; return true;
} }

View File

@ -135,6 +135,11 @@ bool OutputOptions::Private::writeData(const void * data, int size) const
return outputHandler == NULL || outputHandler->writeData(data, size); return outputHandler == NULL || outputHandler->writeData(data, size);
} }
void OutputOptions::Private::endImage() const
{
if (outputHandler != NULL) outputHandler->endImage();
}
void OutputOptions::Private::error(Error e) const void OutputOptions::Private::error(Error e) const
{ {
if (errorHandler != NULL) errorHandler->error(e); if (errorHandler != NULL) errorHandler->error(e);

View File

@ -52,6 +52,11 @@ namespace nvtt
return true; return true;
} }
virtual void endImage()
{
// ignore.
}
nv::StdOutputStream stream; nv::StdOutputStream stream;
}; };
@ -72,6 +77,7 @@ namespace nvtt
void beginImage(int size, int width, int height, int depth, int face, int miplevel) const; void beginImage(int size, int width, int height, int depth, int face, int miplevel) const;
bool writeData(const void * data, int size) const; bool writeData(const void * data, int size) const;
void endImage() const;
void error(Error e) const; void error(Error e) const;
}; };

View File

@ -18,8 +18,8 @@
// http://msdn.microsoft.com/en-us/library/dd504870.aspx // http://msdn.microsoft.com/en-us/library/dd504870.aspx
#if NV_OS_WIN32 && _MSC_VER >= 1600 #if NV_OS_WIN32 && _MSC_VER >= 1600
#define HAVE_PPL 1 #define HAVE_PPL 1
//#include <array> #include <array>
#include <ppl.h> //#include <ppl.h>
#endif #endif
// Intel Thread Building Blocks (TBB). // Intel Thread Building Blocks (TBB).
@ -28,6 +28,8 @@
#include <tbb/parallel_for.h> #include <tbb/parallel_for.h>
#endif #endif
#include "nvthread/ParallelFor.h"
namespace nvtt { namespace nvtt {
@ -40,6 +42,15 @@ namespace nvtt {
} }
}; };
struct ParallelTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, int count) {
nv::ParallelFor parallelFor(task, context);
parallelFor.run(count); // @@ Add support for custom grain.
}
};
#if defined(HAVE_OPENMP) #if defined(HAVE_OPENMP)
struct OpenMPTaskDispatcher : public TaskDispatcher struct OpenMPTaskDispatcher : public TaskDispatcher
@ -81,9 +92,24 @@ namespace nvtt {
#if defined(HAVE_PPL) #if defined(HAVE_PPL)
class CountingIterator
{
public:
CountingIterator() : i(0) {}
CountingIterator(const CountingIterator & rhs) : i(0) {}
explicit CountingIterator(int x) : i(x) {}
const int & operator*() const { return i; }
CountingIterator & operator++() { i++; return *this; }
CountingIterator & operator--() { i--; return *this; }
private:
int i;
};
struct TaskFunctor { struct TaskFunctor {
TaskFunctor(Task * task, void * context) : task(task), context(context) {} TaskFunctor(Task * task, void * context) : task(task), context(context) {}
void operator()(int n) const { void operator()(int & n) const {
task(context, n); task(context, n);
} }
Task * task; Task * task;
@ -95,12 +121,16 @@ namespace nvtt {
{ {
virtual void dispatch(Task * task, void * context, int count) virtual void dispatch(Task * task, void * context, int count)
{ {
CountingIterator begin(0);
CountingIterator end((int)count);
TaskFunctor func(task, context); TaskFunctor func(task, context);
Concurrency::parallel_for(0, count, func);
std::for_each(begin, end, func);
//parallel_for_each(begin, end, func);
} }
}; };
#endif // HAVE_PPL #endif
#if defined(HAVE_TBB) #if defined(HAVE_TBB)
@ -132,7 +162,8 @@ namespace nvtt {
#elif defined(HAVE_GCD) #elif defined(HAVE_GCD)
typedef AppleTaskDispatcher ConcurrentTaskDispatcher; typedef AppleTaskDispatcher ConcurrentTaskDispatcher;
#else #else
typedef SequentialTaskDispatcher ConcurrentTaskDispatcher; //typedef SequentialTaskDispatcher ConcurrentTaskDispatcher;
typedef ParallelTaskDispatcher ConcurrentTaskDispatcher;
#endif #endif
} // namespace nvtt } // namespace nvtt

View File

@ -615,7 +615,7 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
block->decodeBlock(&colors, false); block->decodeBlock(&colors, false);
} }
else if (decoder == Decoder_NV5x) { else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors); block->decodeBlockNV5x(&colors);
} }
} }
else if (format == nvtt::Format_BC3) else if (format == nvtt::Format_BC3)
@ -629,19 +629,19 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
block->decodeBlock(&colors, false); block->decodeBlock(&colors, false);
} }
else if (decoder == Decoder_NV5x) { else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors); block->decodeBlockNV5x(&colors);
} }
} }
else if (format == nvtt::Format_BC4) else if (format == nvtt::Format_BC4)
{ {
const BlockATI1 * block = (const BlockATI1 *)ptr; const BlockATI1 * block = (const BlockATI1 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9); block->decodeBlock(&colors, decoder == Decoder_D3D9);
} }
else if (format == nvtt::Format_BC5) else if (format == nvtt::Format_BC5)
{ {
const BlockATI2 * block = (const BlockATI2 *)ptr; const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9); block->decodeBlock(&colors, decoder == Decoder_D3D9);
} }
for (int yy = 0; yy < 4; yy++) for (int yy = 0; yy < 4; yy++)
{ {
@ -864,6 +864,42 @@ bool TexImage::buildNextMipmap(MipmapFilter filter, float filterWidth, const flo
return true; return true;
} }
void TexImage::canvasSize(int w, int h, int d)
{
nvDebugCheck(w > 0 && h > 0 && d > 0);
FloatImage * img = m->image;
if (img == NULL || (w == img->width() && h == img->height() && d == img->depth())) {
return;
}
detach();
FloatImage * new_img = new FloatImage;
new_img->allocate(4, w, h, d);
new_img->clear();
w = min(uint(w), img->width());
h = min(uint(h), img->height());
d = min(uint(d), img->depth());
for (int z = 0; z < d; z++) {
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
new_img->pixel(0, x, y, z) = img->pixel(0, x, y, z);
new_img->pixel(1, x, y, z) = img->pixel(1, x, y, z);
new_img->pixel(2, x, y, z) = img->pixel(2, x, y, z);
new_img->pixel(3, x, y, z) = img->pixel(3, x, y, z);
}
}
}
delete m->image;
m->image = new_img;
m->type = (d == 1) ? TextureType_2D : TextureType_3D;
}
// Color transforms. // Color transforms.
void TexImage::toLinear(float gamma) void TexImage::toLinear(float gamma)
{ {
@ -885,6 +921,66 @@ void TexImage::toGamma(float gamma)
m->image->toGamma(0, 3, gamma); m->image->toGamma(0, 3, gamma);
} }
static float toSrgb(float f) {
if (f <= 0.0) f = 0.0f;
else if (f <= 0.0031308f) f = 12.92f * f;
else if (f <= 1.0f) f = (powf(f, 0.41666f) * 1.055f) - 0.055f;
else f = 1.0f;
return f;
}
void TexImage::toSrgb()
{
FloatImage * img = m->image;
if (img == NULL) return;
detach();
const uint count = img->pixelCount();
for (uint j = 0; j < count; j++)
{
float & r = img->pixel(0, j);
float & g = img->pixel(1, j);
float & b = img->pixel(2, j);
r = ::toSrgb(r);
g = ::toSrgb(g);
b = ::toSrgb(b);
}
}
static float toXenonSrgb(float f) {
if (f < 0) f = 0;
else if (f < (1.0f/16.0f)) f = 4.0f * f;
else if (f < (1.0f/8.0f)) f = 0.25f + 2.0f * (f - 0.0625f);
else if (f < 0.5f) f = 0.375f + 1.0f * (f - 0.125f);
else if (f < 1.0f) f = 0.75f + 0.5f * (f - 0.50f);
else f = 1.0f;
return f;
}
void TexImage::toXenonSrgb()
{
FloatImage * img = m->image;
if (img == NULL) return;
detach();
const uint count = img->pixelCount();
for (uint j = 0; j < count; j++)
{
float & r = img->pixel(0, j);
float & g = img->pixel(1, j);
float & b = img->pixel(2, j);
r = ::toXenonSrgb(r);
g = ::toXenonSrgb(g);
b = ::toXenonSrgb(b);
}
}
void TexImage::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]) void TexImage::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4])
{ {
if (m->image == NULL) return; if (m->image == NULL) return;
@ -1140,9 +1236,9 @@ void TexImage::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
const uint count = img->pixelCount(); const uint count = img->pixelCount();
for (uint i = 0; i < count; i++) { for (uint i = 0; i < count; i++) {
float R = nv::clamp(r[i] * irange, 0.0f, 1.0f); float R = nv::clamp(r[i], 0.0f, 1.0f);
float G = nv::clamp(g[i] * irange, 0.0f, 1.0f); float G = nv::clamp(g[i], 0.0f, 1.0f);
float B = nv::clamp(b[i] * irange, 0.0f, 1.0f); float B = nv::clamp(b[i], 0.0f, 1.0f);
#if 1 #if 1
float M = max(max(R, G), max(B, threshold)); float M = max(max(R, G), max(B, threshold));

View File

@ -294,6 +294,9 @@ namespace nvtt
/// Output data. Compressed data is output as soon as it's generated to minimize memory allocations. /// Output data. Compressed data is output as soon as it's generated to minimize memory allocations.
virtual bool writeData(const void * data, int size) = 0; virtual bool writeData(const void * data, int size) = 0;
/// Indicate the end of a the compressed image.
virtual void endImage() = 0;
}; };
/// Error codes. /// Error codes.
@ -440,10 +443,13 @@ namespace nvtt
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0); NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API bool buildNextMipmap(MipmapFilter filter); NVTT_API bool buildNextMipmap(MipmapFilter filter);
NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0); NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0);
NVTT_API void canvasSize(int w, int h, int d);
// Color transforms. // Color transforms.
NVTT_API void toLinear(float gamma); NVTT_API void toLinear(float gamma);
NVTT_API void toGamma(float gamma); NVTT_API void toGamma(float gamma);
NVTT_API void toSrgb();
NVTT_API void toXenonSrgb();
NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]); NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]);
NVTT_API void swizzle(int r, int g, int b, int a); NVTT_API void swizzle(int r, int g, int b, int a);
NVTT_API void scaleBias(int channel, float scale, float bias); NVTT_API void scaleBias(int channel, float scale, float bias);

View File

@ -56,6 +56,11 @@ struct MyOutputHandler : public nvtt::OutputHandler
// ignore. // ignore.
} }
virtual void endImage()
{
// Ignore.
}
// Output data. // Output data.
virtual bool writeData(const void * data, int size) virtual bool writeData(const void * data, int size)
{ {