Merge changes from the witness.
This commit is contained in:
parent
9c0658edca
commit
3c0ab2d3f3
346
project/vc9/nvthread/nvthread.vcproj
Normal file
346
project/vc9/nvthread/nvthread.vcproj
Normal file
@ -0,0 +1,346 @@
|
||||
<?xml version="1.0" encoding="Windows-1252"?>
|
||||
<VisualStudioProject
|
||||
ProjectType="Visual C++"
|
||||
Version="9.00"
|
||||
Name="nvthread"
|
||||
ProjectGUID="{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}"
|
||||
RootNamespace="nvthread"
|
||||
Keyword="Win32Proj"
|
||||
TargetFrameworkVersion="131072"
|
||||
>
|
||||
<Platforms>
|
||||
<Platform
|
||||
Name="Win32"
|
||||
/>
|
||||
<Platform
|
||||
Name="x64"
|
||||
/>
|
||||
</Platforms>
|
||||
<ToolFiles>
|
||||
</ToolFiles>
|
||||
<Configurations>
|
||||
<Configuration
|
||||
Name="Debug|Win32"
|
||||
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
ConfigurationType="4"
|
||||
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
|
||||
CharacterSet="2"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories=""
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="4"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Debug|x64"
|
||||
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
ConfigurationType="4"
|
||||
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
|
||||
CharacterSet="0"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TargetEnvironment="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="0"
|
||||
AdditionalIncludeDirectories=""
|
||||
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
||||
MinimalRebuild="true"
|
||||
BasicRuntimeChecks="3"
|
||||
RuntimeLibrary="3"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|Win32"
|
||||
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
ConfigurationType="4"
|
||||
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
|
||||
CharacterSet="2"
|
||||
WholeProgramOptimization="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="3"
|
||||
InlineFunctionExpansion="0"
|
||||
EnableIntrinsicFunctions="true"
|
||||
FavorSizeOrSpeed="0"
|
||||
OmitFramePointers="true"
|
||||
EnableFiberSafeOptimizations="true"
|
||||
AdditionalIncludeDirectories=""
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
||||
StringPooling="true"
|
||||
RuntimeLibrary="2"
|
||||
EnableFunctionLevelLinking="false"
|
||||
EnableEnhancedInstructionSet="2"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
<Configuration
|
||||
Name="Release|x64"
|
||||
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
|
||||
ConfigurationType="4"
|
||||
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
|
||||
CharacterSet="0"
|
||||
WholeProgramOptimization="1"
|
||||
>
|
||||
<Tool
|
||||
Name="VCPreBuildEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCustomBuildTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXMLDataGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCWebServiceProxyGeneratorTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCMIDLTool"
|
||||
TargetEnvironment="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
Optimization="3"
|
||||
EnableIntrinsicFunctions="true"
|
||||
OmitFramePointers="true"
|
||||
WholeProgramOptimization="true"
|
||||
AdditionalIncludeDirectories=""
|
||||
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
|
||||
StringPooling="true"
|
||||
RuntimeLibrary="2"
|
||||
EnableFunctionLevelLinking="false"
|
||||
UsePrecompiledHeader="0"
|
||||
WarningLevel="3"
|
||||
DebugInformationFormat="3"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCManagedResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCResourceCompilerTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPreLinkEventTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCLibrarianTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCALinkTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCXDCMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCBscMakeTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCFxCopTool"
|
||||
/>
|
||||
<Tool
|
||||
Name="VCPostBuildEventTool"
|
||||
/>
|
||||
</Configuration>
|
||||
</Configurations>
|
||||
<References>
|
||||
</References>
|
||||
<Files>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\Atomic.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\Event.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\Event.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\Mutex.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\Mutex.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\nvthread.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\nvthread.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\ParallelFor.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\ParallelFor.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\Thread.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\Thread.h"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\ThreadPool.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath="..\..\..\src\nvthread\ThreadPool.h"
|
||||
>
|
||||
</File>
|
||||
</Files>
|
||||
<Globals>
|
||||
</Globals>
|
||||
</VisualStudioProject>
|
@ -4,6 +4,7 @@ Microsoft Visual Studio Solution File, Format Version 10.00
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
|
||||
ProjectSection(ProjectDependencies) = postProject
|
||||
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB} = {3DD3A43D-C6EA-460F-821B-6C339A03C5BB}
|
||||
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
|
||||
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
|
||||
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
|
||||
@ -88,6 +89,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "imperativeapi", "imperative
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc6h", "bc6h\bc6h.vcproj", "{C33787E3-5564-4834-9FE3-A9020455A669}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvthread", "nvthread\nvthread.vcproj", "{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug (no cuda)|Mixed Platforms = Debug (no cuda)|Mixed Platforms
|
||||
@ -457,6 +460,28 @@ Global
|
||||
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Win32.Build.0 = Release|Win32
|
||||
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.ActiveCfg = Release|x64
|
||||
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.Build.0 = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Win32.ActiveCfg = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|x64.Build.0 = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Mixed Platforms.Build.0 = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|x64.Build.0 = Debug|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Win32.ActiveCfg = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|x64.ActiveCfg = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|x64.Build.0 = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Mixed Platforms.ActiveCfg = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Mixed Platforms.Build.0 = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Win32.Build.0 = Release|Win32
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|x64.ActiveCfg = Release|x64
|
||||
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
|
@ -6,6 +6,7 @@ INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/stb)
|
||||
SUBDIRS(nvcore)
|
||||
SUBDIRS(nvmath)
|
||||
SUBDIRS(nvimage)
|
||||
SUBDIRS(nvthread)
|
||||
SUBDIRS(nvtt)
|
||||
|
||||
# OpenGL
|
||||
|
@ -78,8 +78,8 @@ namespace nv
|
||||
}
|
||||
|
||||
template <typename T>
|
||||
bool find(const T & element, const T * restrict ptr, uint count, uint * index) {
|
||||
for (uint i = 0; i < count; i++) {
|
||||
bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
|
||||
for (uint i = begin; i < end; i++) {
|
||||
if (ptr[i] == element) {
|
||||
if (index != NULL) *index = i;
|
||||
return true;
|
||||
@ -257,15 +257,15 @@ namespace nv
|
||||
}
|
||||
|
||||
/// Return true if element found.
|
||||
NV_FORCEINLINE bool find(const T & element, uint * index) const
|
||||
NV_FORCEINLINE bool find(const T & element, uint * indexPtr) const
|
||||
{
|
||||
return find(element, 0, m_size, index);
|
||||
return find(element, 0, m_size, indexPtr);
|
||||
}
|
||||
|
||||
/// Return true if element found within the given range.
|
||||
NV_FORCEINLINE bool find(const T & element, uint first, uint count, uint * index) const
|
||||
NV_FORCEINLINE bool find(const T & element, uint begin, uint end, uint * indexPtr) const
|
||||
{
|
||||
return ::nv::find(element, m_buffer + first, count, index);
|
||||
return ::nv::find(element, m_buffer, begin, end, indexPtr);
|
||||
}
|
||||
|
||||
/// Remove the element at the given index. This is an expensive operation!
|
||||
|
@ -448,19 +448,6 @@ namespace
|
||||
/** Win32 assert handler. */
|
||||
struct Win32AssertHandler : public AssertHandler
|
||||
{
|
||||
// Code from Daniel Vogel.
|
||||
static bool isDebuggerPresent()
|
||||
{
|
||||
HINSTANCE kernel32 = GetModuleHandle("kernel32.dll");
|
||||
if (kernel32) {
|
||||
FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
|
||||
if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Flush the message queue. This is necessary for the message box to show up.
|
||||
static void flushMessageQueue()
|
||||
{
|
||||
@ -487,7 +474,7 @@ namespace
|
||||
nvDebug( error_string.str() );
|
||||
}
|
||||
|
||||
if (isDebuggerPresent()) {
|
||||
if (debug::isDebuggerPresent()) {
|
||||
return NV_ABORT_DEBUG;
|
||||
}
|
||||
|
||||
@ -522,15 +509,6 @@ namespace
|
||||
/** Xbox360 assert handler. */
|
||||
struct Xbox360AssertHandler : public AssertHandler
|
||||
{
|
||||
static bool isDebuggerPresent()
|
||||
{
|
||||
#ifdef _DEBUG
|
||||
return DmIsDebuggerPresent() == TRUE;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Assert handler method.
|
||||
virtual int assertion( const char * exp, const char * file, int line, const char * func/*=NULL*/ )
|
||||
{
|
||||
@ -546,7 +524,7 @@ namespace
|
||||
nvDebug( error_string.str() );
|
||||
}
|
||||
|
||||
if (isDebuggerPresent()) {
|
||||
if (debug::isDebuggerPresent()) {
|
||||
return NV_ABORT_DEBUG;
|
||||
}
|
||||
|
||||
@ -563,26 +541,6 @@ namespace
|
||||
/** Unix assert handler. */
|
||||
struct UnixAssertHandler : public AssertHandler
|
||||
{
|
||||
bool isDebuggerPresent()
|
||||
{
|
||||
#if NV_OS_DARWIN
|
||||
int mib[4];
|
||||
struct kinfo_proc info;
|
||||
size_t size;
|
||||
mib[0] = CTL_KERN;
|
||||
mib[1] = KERN_PROC;
|
||||
mib[2] = KERN_PROC_PID;
|
||||
mib[3] = getpid();
|
||||
size = sizeof(info);
|
||||
info.kp_proc.p_flag = 0;
|
||||
sysctl(mib,4,&info,&size,NULL,0);
|
||||
return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
|
||||
#else
|
||||
// if ppid != sid, some process spawned our app, probably a debugger.
|
||||
return getsid(getpid()) != getppid();
|
||||
#endif
|
||||
}
|
||||
|
||||
// Assert handler method.
|
||||
virtual int assertion(const char * exp, const char * file, int line, const char * func)
|
||||
{
|
||||
@ -594,7 +552,7 @@ namespace
|
||||
}
|
||||
|
||||
#if _DEBUG
|
||||
if (isDebuggerPresent()) {
|
||||
if (debug::isDebuggerPresent()) {
|
||||
return NV_ABORT_DEBUG;
|
||||
}
|
||||
#endif
|
||||
@ -702,7 +660,10 @@ void debug::enableSigHandler()
|
||||
// SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
|
||||
SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME);
|
||||
|
||||
SymInitialize(GetCurrentProcess(), NULL, TRUE);
|
||||
if (!SymInitialize(GetCurrentProcess(), NULL, TRUE)) {
|
||||
DWORD error = GetLastError();
|
||||
nvDebug("SymInitialize returned error : %d\n", error);
|
||||
}
|
||||
|
||||
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
|
||||
|
||||
@ -743,3 +704,38 @@ void debug::disableSigHandler()
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
bool debug::isDebuggerPresent()
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
HINSTANCE kernel32 = GetModuleHandle("kernel32.dll");
|
||||
if (kernel32) {
|
||||
FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
|
||||
if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
#elif NV_OS_XBOX
|
||||
#ifdef _DEBUG
|
||||
return DmIsDebuggerPresent() == TRUE;
|
||||
#else
|
||||
return false;
|
||||
#endif
|
||||
#elif NV_OS_DARWIN
|
||||
int mib[4];
|
||||
struct kinfo_proc info;
|
||||
size_t size;
|
||||
mib[0] = CTL_KERN;
|
||||
mib[1] = KERN_PROC;
|
||||
mib[2] = KERN_PROC_PID;
|
||||
mib[3] = getpid();
|
||||
size = sizeof(info);
|
||||
info.kp_proc.p_flag = 0;
|
||||
sysctl(mib,4,&info,&size,NULL,0);
|
||||
return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
|
||||
#else
|
||||
// if ppid != sid, some process spawned our app, probably a debugger.
|
||||
return getsid(getpid()) != getppid();
|
||||
#endif
|
||||
}
|
||||
|
@ -10,6 +10,9 @@
|
||||
# include <stdarg.h> // va_list
|
||||
#endif
|
||||
|
||||
// Make sure we are using our assert.
|
||||
#undef assert
|
||||
|
||||
#define NV_ABORT_DEBUG 1
|
||||
#define NV_ABORT_IGNORE 2
|
||||
#define NV_ABORT_EXIT 3
|
||||
@ -116,12 +119,6 @@
|
||||
#endif
|
||||
|
||||
|
||||
#if __cplusplus > 199711L
|
||||
#define nvStaticCheck(x) static_assert(x)
|
||||
#else
|
||||
#define nvStaticCheck(x) typedef char NV_DO_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
|
||||
#endif
|
||||
|
||||
NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL);
|
||||
NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
|
||||
|
||||
@ -166,6 +163,8 @@ namespace nv
|
||||
|
||||
NVCORE_API void enableSigHandler();
|
||||
NVCORE_API void disableSigHandler();
|
||||
|
||||
NVCORE_API bool isDebuggerPresent();
|
||||
}
|
||||
|
||||
} // nv namespace
|
||||
|
@ -2,7 +2,7 @@
|
||||
#error "Do not include this file directly."
|
||||
#endif
|
||||
|
||||
//#include <stdint.h> // uint8_t, int8_t, ...
|
||||
#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
|
||||
#include <cstddef> // operator new, size_t, NULL
|
||||
|
||||
// Function linkage
|
||||
@ -67,4 +67,4 @@ typedef int64_t int64;
|
||||
|
||||
// Aliases
|
||||
typedef uint32 uint;
|
||||
*/
|
||||
*/
|
||||
|
@ -12,10 +12,10 @@
|
||||
#include <new> // new and delete
|
||||
|
||||
|
||||
#if NV_CC_GNUC
|
||||
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
|
||||
#else
|
||||
# define NV_ALIGN_16 __declspec(align(16))
|
||||
#if NV_CC_GNUC
|
||||
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
|
||||
#else
|
||||
# define NV_ALIGN_16 __declspec(align(16))
|
||||
#endif
|
||||
|
||||
|
||||
@ -43,15 +43,15 @@ extern "C" {
|
||||
namespace nv {
|
||||
|
||||
// C++ helpers.
|
||||
template <typename T> T * malloc(size_t count) {
|
||||
template <typename T> NV_FORCEINLINE T * malloc(size_t count) {
|
||||
return (T *)::malloc(sizeof(T) * count);
|
||||
}
|
||||
|
||||
template <typename T> T * realloc(T * ptr, size_t count) {
|
||||
template <typename T> NV_FORCEINLINE T * realloc(T * ptr, size_t count) {
|
||||
return (T *)::realloc(ptr, sizeof(T) * count);
|
||||
}
|
||||
|
||||
template <typename T> void free(const T * ptr) {
|
||||
template <typename T> NV_FORCEINLINE void free(const T * ptr) {
|
||||
::free((void *)ptr);
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,7 @@ namespace nv
|
||||
#if NV_OS_WIN32
|
||||
return _ftell_nolock(m_fp);
|
||||
#else
|
||||
return ftell(m_fp);
|
||||
return (uint)ftell(m_fp);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -85,9 +85,9 @@ namespace nv
|
||||
uint end = _ftell_nolock(m_fp);
|
||||
_fseek_nolock(m_fp, pos, SEEK_SET);
|
||||
#else
|
||||
uint pos = ftell(m_fp);
|
||||
uint pos = (uint)ftell(m_fp);
|
||||
fseek(m_fp, 0, SEEK_END);
|
||||
uint end = ftell(m_fp);
|
||||
uint end = (uint)ftell(m_fp);
|
||||
fseek(m_fp, pos, SEEK_SET);
|
||||
#endif
|
||||
return end;
|
||||
|
@ -189,7 +189,7 @@ StringBuilder::StringBuilder() : m_size(0), m_str(NULL)
|
||||
}
|
||||
|
||||
/** Preallocate space. */
|
||||
StringBuilder::StringBuilder( int size_hint ) : m_size(size_hint)
|
||||
StringBuilder::StringBuilder( uint size_hint ) : m_size(size_hint)
|
||||
{
|
||||
nvDebugCheck(m_size > 0);
|
||||
m_str = strAlloc(m_size);
|
||||
@ -203,9 +203,15 @@ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
|
||||
}
|
||||
|
||||
/** Copy string. */
|
||||
StringBuilder::StringBuilder( const char * s, int extra_size_hint/*=0*/ ) : m_size(0), m_str(NULL)
|
||||
StringBuilder::StringBuilder(const char * s) : m_size(0), m_str(NULL)
|
||||
{
|
||||
copy(s, extra_size_hint);
|
||||
copy(s);
|
||||
}
|
||||
|
||||
/** Copy string. */
|
||||
StringBuilder::StringBuilder(const char * s, uint len) : m_size(0), m_str(NULL)
|
||||
{
|
||||
copy(s, len);
|
||||
}
|
||||
|
||||
/** Delete the string. */
|
||||
@ -396,15 +402,25 @@ StringBuilder & StringBuilder::reserve( uint size_hint )
|
||||
|
||||
|
||||
/** Copy a string safely. */
|
||||
StringBuilder & StringBuilder::copy( const char * s, int extra_size/*=0*/ )
|
||||
StringBuilder & StringBuilder::copy(const char * s)
|
||||
{
|
||||
nvCheck( s != NULL );
|
||||
const uint str_size = uint(strlen( s )) + 1;
|
||||
reserve(str_size + extra_size);
|
||||
reserve(str_size);
|
||||
memcpy(m_str, s, str_size);
|
||||
return *this;
|
||||
}
|
||||
|
||||
/** Copy a string safely. */
|
||||
StringBuilder & StringBuilder::copy(const char * s, uint len)
|
||||
{
|
||||
nvCheck( s != NULL );
|
||||
const uint str_size = len + 1;
|
||||
reserve(str_size);
|
||||
strCpy(m_str, str_size, s, len);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
||||
/** Copy an StringBuilder. */
|
||||
StringBuilder & StringBuilder::copy( const StringBuilder & s )
|
||||
|
@ -59,9 +59,10 @@ namespace nv
|
||||
public:
|
||||
|
||||
StringBuilder();
|
||||
explicit StringBuilder( int size_hint );
|
||||
StringBuilder( const char * str, int extra_size_hint = 0);
|
||||
StringBuilder( const StringBuilder & );
|
||||
explicit StringBuilder( uint size_hint );
|
||||
StringBuilder(const char * str);
|
||||
StringBuilder(const char * str, uint len);
|
||||
StringBuilder(const StringBuilder & other);
|
||||
|
||||
~StringBuilder();
|
||||
|
||||
@ -75,9 +76,10 @@ namespace nv
|
||||
StringBuilder & number( int i, int base = 10 );
|
||||
StringBuilder & number( uint i, int base = 10 );
|
||||
|
||||
StringBuilder & reserve( uint size_hint );
|
||||
StringBuilder & copy( const char * str, int extra_size/*=0*/ );
|
||||
StringBuilder & copy( const StringBuilder & str );
|
||||
StringBuilder & reserve(uint size_hint);
|
||||
StringBuilder & copy(const char * str);
|
||||
StringBuilder & copy(const char * str, uint len);
|
||||
StringBuilder & copy(const StringBuilder & str);
|
||||
|
||||
StringBuilder & toLower();
|
||||
StringBuilder & toUpper();
|
||||
@ -145,7 +147,7 @@ namespace nv
|
||||
public:
|
||||
Path() : StringBuilder() {}
|
||||
explicit Path(int size_hint) : StringBuilder(size_hint) {}
|
||||
Path(const char * str, int extra_size_hint = 0) : StringBuilder(str, extra_size_hint) {}
|
||||
Path(const char * str) : StringBuilder(str) {}
|
||||
Path(const Path & path) : StringBuilder(path) {}
|
||||
|
||||
const char * fileName() const;
|
||||
|
@ -7,9 +7,76 @@
|
||||
#include "nvcore.h"
|
||||
#include "Debug.h" // nvDebugCheck
|
||||
|
||||
// Just in case. Grrr.
|
||||
#undef min
|
||||
#undef max
|
||||
|
||||
namespace nv
|
||||
{
|
||||
// Less error prone than casting. From CB:
|
||||
// http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
|
||||
inline int8 asSigned(uint8 x) { return (int8) x; }
|
||||
inline int16 asSigned(uint16 x) { return (int16) x; }
|
||||
inline int32 asSigned(uint32 x) { return (int32) x; }
|
||||
inline int64 asSigned(uint64 x) { return (int64) x; }
|
||||
|
||||
inline uint8 asUnsigned(int8 x) { return (uint8) x; }
|
||||
inline uint16 asUnsigned(int16 x) { return (uint16) x; }
|
||||
inline uint32 asUnsigned(int32 x) { return (uint32) x; }
|
||||
inline uint64 asUnsigned(int64 x) { return (uint64) x; }
|
||||
|
||||
/*
|
||||
template <typename T> inline int8 toI8(T x) {
|
||||
nvDebugCheck(x <= INT8_MAX);
|
||||
nvDebugCheck(x >= INT8_MIN);
|
||||
int8 y = (int8) x;
|
||||
nvDebugCheck(x == (T)y);
|
||||
return y;
|
||||
}
|
||||
|
||||
template <typename T> inline uint8 toU8(T x) {
|
||||
nvDebugCheck(x <= UINT8_MAX);
|
||||
nvDebugCheck(x >= 0);
|
||||
return (uint8) x;
|
||||
}
|
||||
|
||||
template <typename T> inline int16 toI16(T x) {
|
||||
nvDebugCheck(x <= INT16_MAX);
|
||||
nvDebugCheck(x >= INT16_MIN);
|
||||
return (int16) x;
|
||||
}
|
||||
|
||||
template <typename T> inline uint16 toU16(T x) {
|
||||
nvDebugCheck(x <= UINT16_MAX);
|
||||
nvDebugCheck(x >= 0);
|
||||
return (uint16) x;
|
||||
}
|
||||
|
||||
template <typename T> inline int32 toI32(T x) {
|
||||
nvDebugCheck(x <= INT32_MAX);
|
||||
nvDebugCheck(x >= INT32_MIN);
|
||||
return (int32) x;
|
||||
}
|
||||
|
||||
template <typename T> inline uint32 toU32(T x) {
|
||||
nvDebugCheck(x <= UINT32_MAX);
|
||||
nvDebugCheck(x >= 0);
|
||||
return (uint32) x;
|
||||
}
|
||||
|
||||
template <typename T> inline int64 toI64(T x) {
|
||||
nvDebugCheck(x <= INT64_MAX);
|
||||
nvDebugCheck(x >= INT64_MIN);
|
||||
return (int64) x;
|
||||
}
|
||||
|
||||
template <typename T> inline uint64 toU64(T x) {
|
||||
nvDebugCheck(x <= UINT64_MAX);
|
||||
nvDebugCheck(x >= 0);
|
||||
return (uint64) x;
|
||||
}
|
||||
*/
|
||||
|
||||
/// Swap two values.
|
||||
template <typename T>
|
||||
inline void swap(T & a, T & b)
|
||||
|
@ -4,9 +4,6 @@
|
||||
#ifndef NV_CORE_H
|
||||
#define NV_CORE_H
|
||||
|
||||
// cmake config
|
||||
#include <nvconfig.h>
|
||||
|
||||
// Function linkage
|
||||
#if NVCORE_SHARED
|
||||
#ifdef NVCORE_EXPORTS
|
||||
@ -91,7 +88,11 @@
|
||||
// @@ NV_CC_MSVC7
|
||||
// @@ NV_CC_MSVC8
|
||||
|
||||
#if defined POSH_COMPILER_GCC
|
||||
#if defined POSH_COMPILER_CLANG
|
||||
# define NV_CC_CLANG 1
|
||||
# define NV_CC_GCC 1 // Clang is compatible with GCC.
|
||||
# define NV_CC_STRING "clang"
|
||||
#elif defined POSH_COMPILER_GCC
|
||||
# define NV_CC_GNUC 1
|
||||
# define NV_CC_STRING "gcc"
|
||||
#elif defined POSH_COMPILER_MSVC
|
||||
@ -108,6 +109,18 @@
|
||||
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING
|
||||
|
||||
|
||||
// Define the right printf prefix for size_t arguments:
|
||||
#if POSH_64BIT_POINTER
|
||||
# define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX
|
||||
#else
|
||||
# define NV_SIZET_PRINTF_PREFIX
|
||||
#endif
|
||||
|
||||
|
||||
// cmake config
|
||||
#include "nvconfig.h"
|
||||
|
||||
|
||||
// Type definitions:
|
||||
typedef posh_u8_t uint8;
|
||||
typedef posh_i8_t int8;
|
||||
@ -144,6 +157,8 @@ typedef uint32 uint;
|
||||
private: \
|
||||
void *operator new(size_t size); \
|
||||
void *operator new[](size_t size);
|
||||
//static void *operator new(size_t size); \
|
||||
//static void *operator new[](size_t size);
|
||||
|
||||
// String concatenation macros.
|
||||
#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
|
||||
@ -153,6 +168,25 @@ typedef uint32 uint;
|
||||
#define NV_STRING2(x) #x
|
||||
#define NV_STRING(x) NV_STRING2(x)
|
||||
|
||||
|
||||
#if __cplusplus > 199711L
|
||||
#define nvStaticCheck(x) static_assert(x)
|
||||
#else
|
||||
#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
|
||||
#endif
|
||||
#define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best.
|
||||
|
||||
// Make sure type definitions are fine.
|
||||
NV_COMPILER_CHECK(sizeof(int8) == 1);
|
||||
NV_COMPILER_CHECK(sizeof(uint8) == 1);
|
||||
NV_COMPILER_CHECK(sizeof(int16) == 2);
|
||||
NV_COMPILER_CHECK(sizeof(uint16) == 2);
|
||||
NV_COMPILER_CHECK(sizeof(int32) == 4);
|
||||
NV_COMPILER_CHECK(sizeof(uint32) == 4);
|
||||
NV_COMPILER_CHECK(sizeof(int32) == 4);
|
||||
NV_COMPILER_CHECK(sizeof(uint32) == 4);
|
||||
|
||||
|
||||
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
|
||||
|
||||
#if 1
|
||||
@ -180,6 +214,7 @@ typedef uint32 uint;
|
||||
|
||||
// Null index. @@ Move this somewhere else... it's only used by nvmesh.
|
||||
//const unsigned int NIL = unsigned int(~0);
|
||||
//#define NIL uint(~0)
|
||||
|
||||
// Null pointer.
|
||||
#ifndef NULL
|
||||
|
@ -1418,7 +1418,7 @@ uint DirectDrawSurface::mipmapSize(uint mipmap) const
|
||||
{
|
||||
nvDebugCheck((header.pf.flags & DDPF_RGB) || (header.pf.flags & DDPF_LUMINANCE));
|
||||
|
||||
uint pitch = computeBytePitch(w, header.pf.bitcount, 8); // Asuming 8 bit alignment, which is the same D3DX expects.
|
||||
uint pitch = computeBytePitch(w, header.pf.bitcount, 1); // Asuming 1 byte alignment, which is the same D3DX expects.
|
||||
|
||||
return pitch * h * d;
|
||||
}
|
||||
|
@ -181,7 +181,7 @@ void FloatImage::normalize(uint baseComponent)
|
||||
for (uint i = 0; i < count; i++) {
|
||||
|
||||
Vector3 normal(xChannel[i], yChannel[i], zChannel[i]);
|
||||
normal = normalizeSafe(normal, Vector3(zero), 0.0f);
|
||||
normal = normalizeSafe(normal, Vector3(0), 0.0f);
|
||||
|
||||
xChannel[i] = normal.x;
|
||||
yChannel[i] = normal.y;
|
||||
|
@ -56,6 +56,7 @@ namespace nv
|
||||
//@{
|
||||
NVIMAGE_API void clear(float f = 0.0f);
|
||||
NVIMAGE_API void clear(uint component, float f = 0.0f);
|
||||
NVIMAGE_API void copyChannel(uint src, uint dst);
|
||||
|
||||
NVIMAGE_API void normalize(uint base_component);
|
||||
|
||||
@ -113,8 +114,6 @@ namespace nv
|
||||
uint pixelCount() const { return m_pixelCount; }
|
||||
|
||||
|
||||
// @@ It would make sense to swap the order of the arguments so that 'c' is always first.
|
||||
|
||||
/** @name Pixel access. */
|
||||
//@{
|
||||
const float * channel(uint c) const;
|
||||
|
@ -70,14 +70,14 @@ namespace nv
|
||||
|
||||
inline const Color32 & Image::pixel(uint x, uint y) const
|
||||
{
|
||||
nvDebugCheck(x < width() && y < height());
|
||||
return pixel(y * width() + x);
|
||||
nvDebugCheck(x < m_width && y < m_height);
|
||||
return pixel(y * m_width + x);
|
||||
}
|
||||
|
||||
inline Color32 & Image::pixel(uint x, uint y)
|
||||
{
|
||||
nvDebugCheck(x < width() && y < height());
|
||||
return pixel(y * width() + x);
|
||||
nvDebugCheck(x < m_width && y < m_height);
|
||||
return pixel(y * m_width + x);
|
||||
}
|
||||
|
||||
} // nv namespace
|
||||
|
@ -215,7 +215,7 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName)
|
||||
StdInputStream stream(fileName);
|
||||
|
||||
if (stream.isError()) {
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return loadFloat(fileName, stream);
|
||||
@ -324,9 +324,9 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
|
||||
|
||||
bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount)
|
||||
{
|
||||
#if !defined(HAVE_FREEIMAGE)
|
||||
const char * extension = Path::extension(fileName);
|
||||
|
||||
#if !defined(HAVE_FREEIMAGE)
|
||||
#if defined(HAVE_OPENEXR)
|
||||
if (strCaseCmp(extension, ".exr") == 0) {
|
||||
return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
|
||||
@ -711,7 +711,7 @@ Image * nv::ImageIO::loadTGA(Stream & s)
|
||||
case TGA_TYPE_INDEXED:
|
||||
if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) {
|
||||
nvDebug( "*** loadTGA: Error, only 24bit paletted images are supported.\n" );
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
pal = true;
|
||||
break;
|
||||
@ -732,7 +732,7 @@ Image * nv::ImageIO::loadTGA(Stream & s)
|
||||
|
||||
default:
|
||||
nvDebug( "*** loadTGA: Error, unsupported image type.\n" );
|
||||
return false;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const uint pixel_size = (tga.pixel_size/8);
|
||||
@ -1369,7 +1369,7 @@ Image * nv::ImageIO::loadJPG(Stream & s)
|
||||
// Read the entire file.
|
||||
Array<uint8> byte_array;
|
||||
byte_array.resize(s.size());
|
||||
s.serialize(byte_array.mutableBuffer(), s.size());
|
||||
s.serialize(byte_array.buffer(), s.size());
|
||||
|
||||
jpeg_decompress_struct cinfo;
|
||||
jpeg_error_mgr jerr;
|
||||
|
@ -487,46 +487,126 @@ nv::half_to_float( uint16 h )
|
||||
return (f_result);
|
||||
}
|
||||
|
||||
uint32
|
||||
nv::fast_half_to_float( uint16 h )
|
||||
{
|
||||
const uint32 h_e_mask = _uint32_li( 0x00007c00 );
|
||||
const uint32 h_m_mask = _uint32_li( 0x000003ff );
|
||||
const uint32 h_s_mask = _uint32_li( 0x00008000 );
|
||||
const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 );
|
||||
const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d );
|
||||
const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 );
|
||||
const uint32 f_e_mask = _uint32_li( 0x7f800000 );
|
||||
const uint32 f_m_mask = _uint32_li( 0x007fffff );
|
||||
const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e );
|
||||
const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 );
|
||||
const uint32 f_e_pos = _uint32_li( 0x00000017 );
|
||||
const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff );
|
||||
const uint32 h_e = _uint32_and( h, h_e_mask );
|
||||
const uint32 h_m = _uint32_and( h, h_m_mask );
|
||||
const uint32 h_s = _uint32_and( h, h_s_mask );
|
||||
const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
|
||||
const uint32 h_m_nlz = _uint32_cntlz( h_m );
|
||||
const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset );
|
||||
const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
|
||||
const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset );
|
||||
const uint32 f_em = _uint32_or( f_e, f_m );
|
||||
const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
|
||||
const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
|
||||
const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa );
|
||||
const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask );
|
||||
const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
|
||||
const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
|
||||
const uint32 f_em_nan = _uint32_or( f_e_mask, f_m );
|
||||
const uint32 is_e_eqz_msb = _uint32_dec( h_e );
|
||||
const uint32 is_m_nez_msb = _uint32_neg( h_m );
|
||||
const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
|
||||
const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
|
||||
const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
|
||||
const uint32 is_zero = _uint32_ext( is_zero_msb );
|
||||
const uint32 f_zero_result = _uint32_andc( f_em, is_zero );
|
||||
const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
|
||||
const uint32 f_result = _uint32_or( f_s, f_denorm_result );
|
||||
|
||||
return (f_result);
|
||||
// @@ These tables could be smaller.
|
||||
static uint32 mantissa_table[2048];
|
||||
static uint32 exponent_table[64];
|
||||
static uint32 offset_table[64];
|
||||
|
||||
void nv::half_init_tables()
|
||||
{
|
||||
// Init mantissa table.
|
||||
mantissa_table[0] = 0;
|
||||
|
||||
for (int i = 1; i < 1024; i++) {
|
||||
uint m = i << 13;
|
||||
uint e = 0;
|
||||
|
||||
while ((m & 0x00800000) == 0) {
|
||||
e -= 0x00800000;
|
||||
m <<= 1;
|
||||
}
|
||||
m &= ~0x00800000;
|
||||
e += 0x38800000;
|
||||
mantissa_table[i] = m | e;
|
||||
}
|
||||
|
||||
for (int i = 1024; i < 2048; i++) {
|
||||
mantissa_table[i] = 0x38000000 + ((i - 1024) << 13);
|
||||
}
|
||||
|
||||
|
||||
// Init exponent table.
|
||||
exponent_table[0] = 0;
|
||||
|
||||
for (int i = 1; i < 31; i++) {
|
||||
exponent_table[i] = (i << 23);
|
||||
}
|
||||
|
||||
exponent_table[31] = 0x47800000;
|
||||
exponent_table[32] = 0x80000000;
|
||||
|
||||
for (int i = 33; i < 63; i++) {
|
||||
exponent_table[i] = 0x80000000 + ((i - 32) << 23);
|
||||
}
|
||||
|
||||
exponent_table[63] = 0xC7800000;
|
||||
|
||||
|
||||
// Init offset table.
|
||||
offset_table[0] = 0;
|
||||
|
||||
for (int i = 1; i < 32; i++) {
|
||||
offset_table[i] = 1024;
|
||||
}
|
||||
|
||||
offset_table[32] = 0;
|
||||
|
||||
for (int i = 33; i < 64; i++) {
|
||||
offset_table[i] = 1024;
|
||||
}
|
||||
|
||||
/*for (int i = 0; i < 64; i++) {
|
||||
offset_table[i] = ((i & 31) != 0) * 1024;
|
||||
}*/
|
||||
}
|
||||
|
||||
// Fast half to float conversion based on:
|
||||
// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
|
||||
uint32 nv::fast_half_to_float(uint16 h)
|
||||
{
|
||||
uint exp = h >> 10;
|
||||
return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
|
||||
}
|
||||
|
||||
|
||||
#if 0
|
||||
// Inaccurate conversion suggested at the ffmpeg mailing list:
|
||||
// http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html
|
||||
uint32 nv::fast_half_to_float(uint16 v)
|
||||
{
|
||||
if (v & 0x8000) return 0;
|
||||
uint exp = v >> 10;
|
||||
if (!exp) return (v>>9)&1;
|
||||
if (exp >= 15) return 0xffff;
|
||||
v <<= 6;
|
||||
return (v+(1<<16)) >> (15-exp);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
|
||||
// Some more from a gamedev thread:
|
||||
// http://www.devmaster.net/forums/showthread.php?t=10924
|
||||
|
||||
// I believe it does not handle specials either.
|
||||
|
||||
// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though.
|
||||
|
||||
|
||||
static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
|
||||
static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
|
||||
static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
|
||||
static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000};
|
||||
|
||||
__asm
|
||||
{
|
||||
movaps xmm1, xmm0 // Input in xmm0
|
||||
movaps xmm2, xmm0
|
||||
|
||||
andps xmm0, half_sign
|
||||
andps xmm1, half_exponent
|
||||
andps xmm2, half_mantissa
|
||||
paddd xmm1, half_bias_offset
|
||||
|
||||
pslld xmm0, 16
|
||||
pslld xmm1, 13
|
||||
pslld xmm2, 13
|
||||
|
||||
orps xmm1, xmm2
|
||||
orps xmm0, xmm1 // Result in xmm0
|
||||
}
|
||||
|
||||
|
||||
#endif
|
@ -9,8 +9,9 @@ namespace nv {
|
||||
uint32 half_to_float( uint16 h );
|
||||
uint16 half_from_float( uint32 f );
|
||||
|
||||
// Does not handle NaN or infinity.
|
||||
uint32 fast_half_to_float( uint16 h );
|
||||
void half_init_tables();
|
||||
|
||||
uint32 fast_half_to_float(uint16 h);
|
||||
|
||||
inline uint16 to_half(float c) {
|
||||
union { float f; uint32 u; } f;
|
||||
|
@ -9,15 +9,14 @@
|
||||
|
||||
namespace nv
|
||||
{
|
||||
enum zero_t { zero };
|
||||
enum identity_t { identity };
|
||||
|
||||
class NVMATH_CLASS Matrix3
|
||||
{
|
||||
public:
|
||||
Matrix3();
|
||||
Matrix3(zero_t);
|
||||
Matrix3(identity_t);
|
||||
explicit Matrix3(float f);
|
||||
explicit Matrix3(identity_t);
|
||||
Matrix3(const Matrix3 & m);
|
||||
Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2);
|
||||
|
||||
@ -41,10 +40,10 @@ namespace nv
|
||||
|
||||
inline Matrix3::Matrix3() {}
|
||||
|
||||
inline Matrix3::Matrix3(zero_t)
|
||||
inline Matrix3::Matrix3(float f)
|
||||
{
|
||||
for(int i = 0; i < 9; i++) {
|
||||
m_data[i] = 0.0f;
|
||||
m_data[i] = f;
|
||||
}
|
||||
}
|
||||
|
||||
@ -204,11 +203,11 @@ namespace nv
|
||||
typedef Matrix const & Arg;
|
||||
|
||||
Matrix();
|
||||
Matrix(zero_t);
|
||||
Matrix(identity_t);
|
||||
explicit Matrix(float f);
|
||||
explicit Matrix(identity_t);
|
||||
Matrix(const Matrix & m);
|
||||
Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
|
||||
Matrix(const scalar m[]); // m is assumed to contain 16 elements
|
||||
//explicit Matrix(const scalar m[]); // m is assumed to contain 16 elements
|
||||
|
||||
scalar data(uint idx) const;
|
||||
scalar & data(uint idx);
|
||||
@ -237,7 +236,7 @@ namespace nv
|
||||
{
|
||||
}
|
||||
|
||||
inline Matrix::Matrix(zero_t)
|
||||
inline Matrix::Matrix(float f)
|
||||
{
|
||||
for(int i = 0; i < 16; i++) {
|
||||
m_data[i] = 0.0f;
|
||||
@ -268,12 +267,12 @@ namespace nv
|
||||
m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w;
|
||||
}
|
||||
|
||||
inline Matrix::Matrix(const scalar m[])
|
||||
/*inline Matrix::Matrix(const scalar m[])
|
||||
{
|
||||
for(int i = 0; i < 16; i++) {
|
||||
m_data[i] = m[i];
|
||||
}
|
||||
}
|
||||
}*/
|
||||
|
||||
|
||||
// Accessors
|
||||
@ -456,7 +455,7 @@ namespace nv
|
||||
/// Get frustum matrix.
|
||||
inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar)
|
||||
{
|
||||
Matrix m(zero);
|
||||
Matrix m(0.0f);
|
||||
|
||||
scalar doubleznear = 2.0f * zNear;
|
||||
scalar one_deltax = 1.0f / (xmax - xmin);
|
||||
@ -477,7 +476,7 @@ namespace nv
|
||||
/// Get infinite frustum matrix.
|
||||
inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear)
|
||||
{
|
||||
Matrix m(zero);
|
||||
Matrix m(0.0f);
|
||||
|
||||
scalar doubleznear = 2.0f * zNear;
|
||||
scalar one_deltax = 1.0f / (xmax - xmin);
|
||||
|
@ -100,6 +100,7 @@ namespace nv
|
||||
explicit Vector4(scalar x);
|
||||
Vector4(scalar x, scalar y, scalar z, scalar w);
|
||||
Vector4(Vector2::Arg v, scalar z, scalar w);
|
||||
Vector4(Vector2::Arg v, Vector2::Arg u);
|
||||
Vector4(Vector3::Arg v, scalar w);
|
||||
Vector4(Vector4::Arg v);
|
||||
// Vector4(const Quaternion & v);
|
||||
@ -107,6 +108,7 @@ namespace nv
|
||||
const Vector4 & operator=(Vector4::Arg v);
|
||||
|
||||
Vector2 xy() const;
|
||||
Vector2 zw() const;
|
||||
Vector3 xyz() const;
|
||||
|
||||
const scalar * ptr() const;
|
||||
@ -290,6 +292,7 @@ namespace nv
|
||||
inline Vector4::Vector4(scalar f) : x(f), y(f), z(f), w(f) {}
|
||||
inline Vector4::Vector4(scalar x, scalar y, scalar z, scalar w) : x(x), y(y), z(z), w(w) {}
|
||||
inline Vector4::Vector4(Vector2::Arg v, scalar z, scalar w) : x(v.x), y(v.y), z(z), w(w) {}
|
||||
inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {}
|
||||
inline Vector4::Vector4(Vector3::Arg v, scalar w) : x(v.x), y(v.y), z(v.z), w(w) {}
|
||||
inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
|
||||
|
||||
@ -307,6 +310,11 @@ namespace nv
|
||||
return Vector2(x, y);
|
||||
}
|
||||
|
||||
inline Vector2 Vector4::zw() const
|
||||
{
|
||||
return Vector2(z, w);
|
||||
}
|
||||
|
||||
inline Vector3 Vector4::xyz() const
|
||||
{
|
||||
return Vector3(x, y, z);
|
||||
@ -469,6 +477,14 @@ namespace nv
|
||||
return scale(v, 1.0f / l);
|
||||
}
|
||||
|
||||
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
|
||||
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
|
||||
inline Vector2 normalizeFast(Vector2::Arg v)
|
||||
{
|
||||
const float very_small_float = 1.0e-037f;
|
||||
float l = very_small_float + length(v);
|
||||
return scale(v, 1.0f / l);
|
||||
}
|
||||
|
||||
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
|
||||
{
|
||||
@ -498,6 +514,14 @@ namespace nv
|
||||
return vf;
|
||||
}
|
||||
|
||||
inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
|
||||
{
|
||||
Vector2 v0 = a - c;
|
||||
Vector2 v1 = b - c;
|
||||
|
||||
return (v0.x * v1.y - v0.y * v1.x);
|
||||
}
|
||||
|
||||
|
||||
// Vector3
|
||||
|
||||
@ -570,10 +594,10 @@ namespace nv
|
||||
return scale(v, 1.0f/s);
|
||||
}
|
||||
|
||||
inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s)
|
||||
/*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s)
|
||||
{
|
||||
return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s);
|
||||
}
|
||||
}*/
|
||||
|
||||
inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, scalar t)
|
||||
{
|
||||
@ -624,6 +648,15 @@ namespace nv
|
||||
return scale(v, 1.0f / l);
|
||||
}
|
||||
|
||||
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
|
||||
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
|
||||
inline Vector3 normalizeFast(Vector3::Arg v)
|
||||
{
|
||||
const float very_small_float = 1.0e-037f;
|
||||
float l = very_small_float + length(v);
|
||||
return scale(v, 1.0f / l);
|
||||
}
|
||||
|
||||
inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON)
|
||||
{
|
||||
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon);
|
||||
@ -762,6 +795,15 @@ namespace nv
|
||||
return scale(v, 1.0f / l);
|
||||
}
|
||||
|
||||
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
|
||||
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
|
||||
inline Vector4 normalizeFast(Vector4::Arg v)
|
||||
{
|
||||
const float very_small_float = 1.0e-037f;
|
||||
float l = very_small_float + length(v);
|
||||
return scale(v, 1.0f / l);
|
||||
}
|
||||
|
||||
inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON)
|
||||
{
|
||||
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon);
|
||||
|
@ -4,8 +4,9 @@
|
||||
#ifndef NV_MATH_H
|
||||
#define NV_MATH_H
|
||||
|
||||
#include <nvcore/nvcore.h>
|
||||
#include <nvcore/Debug.h>
|
||||
#include "nvcore/nvcore.h"
|
||||
#include "nvcore/Debug.h"
|
||||
#include "nvcore/Utils.h" // clamp
|
||||
|
||||
#include <math.h>
|
||||
#include <limits.h> // INT_MAX
|
||||
@ -194,7 +195,7 @@ namespace nv
|
||||
return f - floor(f);
|
||||
}
|
||||
|
||||
inline float fround(float f)
|
||||
inline float fround(float f) // @@ rename floatRound
|
||||
{
|
||||
// @@ Do something better.
|
||||
return float(iround(f));
|
||||
@ -210,6 +211,29 @@ namespace nv
|
||||
}
|
||||
}
|
||||
|
||||
inline float saturate(float f) {
|
||||
return clamp(f, 0.0f, 1.0f);
|
||||
}
|
||||
|
||||
inline float linearstep(float edge0, float edge1, float x) {
|
||||
// Scale, bias and saturate x to 0..1 range
|
||||
return saturate((x - edge0) / (edge1 - edge0));
|
||||
}
|
||||
|
||||
inline float smoothstep(float edge0, float edge1, float x) {
|
||||
x = linearstep(edge0, edge1, x);
|
||||
|
||||
// Evaluate polynomial
|
||||
return x*x*(3 - 2*x);
|
||||
}
|
||||
|
||||
inline int sign(float a)
|
||||
{
|
||||
if (a > 0.0f) return 1;
|
||||
if (a < 0.0f) return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // nv
|
||||
|
||||
#endif // NV_MATH_H
|
||||
|
26
src/nvthread/CMakeLists.txt
Normal file
26
src/nvthread/CMakeLists.txt
Normal file
@ -0,0 +1,26 @@
|
||||
PROJECT(nvthreads)
|
||||
|
||||
SET(THREADS_SRCS
|
||||
nvthreads.h
|
||||
Mutex.h Mutex.cpp
|
||||
SpinWaiter.h SpinWaiter.cpp
|
||||
Thread.h Thread.cpp
|
||||
ThreadLocalStorage.h ThreadLocalStorage.cpp)
|
||||
|
||||
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
|
||||
|
||||
# targets
|
||||
ADD_DEFINITIONS(-DNVTHREADS_EXPORTS)
|
||||
|
||||
IF(NVTHREADS_SHARED)
|
||||
ADD_LIBRARY(nvthreads SHARED ${THREADS_SRCS})
|
||||
ELSE(NVTHREADS_SHARED)
|
||||
ADD_LIBRARY(nvthreads ${THREADS_SRCS})
|
||||
ENDIF(NVTHREADS_SHARED)
|
||||
|
||||
TARGET_LINK_LIBRARIES(nvthreads ${LIBS} nvcore)
|
||||
|
||||
INSTALL(TARGETS nvthreads
|
||||
RUNTIME DESTINATION bin
|
||||
LIBRARY DESTINATION lib
|
||||
ARCHIVE DESTINATION lib/static)
|
52
src/nvthread/Event.cpp
Normal file
52
src/nvthread/Event.cpp
Normal file
@ -0,0 +1,52 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#include "Event.h"
|
||||
|
||||
#if NV_OS_WIN32
|
||||
#include "Win32.h"
|
||||
#elif NV_OS_UNIX
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
|
||||
using namespace nv;
|
||||
|
||||
#if NV_OS_WIN32
|
||||
|
||||
struct Event::Private {
|
||||
HANDLE handle;
|
||||
};
|
||||
|
||||
Event::Event() : m(new Private) {
|
||||
m->handle = CreateEvent(NULL, FALSE, FALSE, NULL);
|
||||
}
|
||||
|
||||
Event::~Event() {
|
||||
CloseHandle(m->handle);
|
||||
}
|
||||
|
||||
void Event::post() {
|
||||
SetEvent(m->handle);
|
||||
}
|
||||
|
||||
void Event::wait() {
|
||||
WaitForSingleObject(m->handle, INFINITE);
|
||||
}
|
||||
|
||||
|
||||
/*static*/ void Event::post(Event * events, uint count) {
|
||||
for (uint i = 0; i < count; i++) {
|
||||
events[i].post();
|
||||
}
|
||||
}
|
||||
|
||||
/*static*/ void Event::wait(Event * events, uint count) {
|
||||
// @@ Use wait for multiple objects?
|
||||
|
||||
for (uint i = 0; i < count; i++) {
|
||||
events[i].wait();
|
||||
}
|
||||
}
|
||||
|
||||
#elif NV_OS_UNIX
|
||||
// @@
|
||||
#endif
|
34
src/nvthread/Event.h
Normal file
34
src/nvthread/Event.h
Normal file
@ -0,0 +1,34 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#pragma once
|
||||
#ifndef NV_THREAD_EVENT_H
|
||||
#define NV_THREAD_EVENT_H
|
||||
|
||||
#include "nvthread.h"
|
||||
|
||||
#include "nvcore/Ptr.h"
|
||||
|
||||
namespace nv
|
||||
{
|
||||
// This is intended to be used by a single waiter thread.
|
||||
class NVTHREAD_CLASS Event
|
||||
{
|
||||
NV_FORBID_COPY(Event);
|
||||
public:
|
||||
Event();
|
||||
~Event();
|
||||
|
||||
void post();
|
||||
void wait(); // Wait resets the event.
|
||||
|
||||
static void post(Event * events, uint count);
|
||||
static void wait(Event * events, uint count);
|
||||
|
||||
private:
|
||||
struct Private;
|
||||
AutoPtr<Private> m;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_THREAD_EVENT_H
|
89
src/nvthread/Mutex.cpp
Normal file
89
src/nvthread/Mutex.cpp
Normal file
@ -0,0 +1,89 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#include "Mutex.h"
|
||||
|
||||
#if NV_OS_WIN32
|
||||
|
||||
#include "Win32.h"
|
||||
|
||||
#elif NV_OS_UNIX
|
||||
|
||||
#include <pthread.h>
|
||||
#include <errno.h> // EBUSY
|
||||
|
||||
#endif // NV_OS
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
#if NV_OS_WIN32
|
||||
|
||||
struct Mutex::Private {
|
||||
CRITICAL_SECTION mutex;
|
||||
};
|
||||
|
||||
|
||||
Mutex::Mutex () : m(new Private)
|
||||
{
|
||||
InitializeCriticalSection(&m->mutex);
|
||||
}
|
||||
|
||||
Mutex::~Mutex ()
|
||||
{
|
||||
DeleteCriticalSection(&m->mutex);
|
||||
}
|
||||
|
||||
void Mutex::lock()
|
||||
{
|
||||
EnterCriticalSection(&m->mutex);
|
||||
}
|
||||
|
||||
bool Mutex::tryLock()
|
||||
{
|
||||
return TryEnterCriticalSection(&m->mutex) != 0;
|
||||
}
|
||||
|
||||
void Mutex::unlock()
|
||||
{
|
||||
LeaveCriticalSection(&m->mutex);
|
||||
}
|
||||
|
||||
#elif NV_OS_UNIX
|
||||
|
||||
struct Mutex::Private {
|
||||
pthread_mutex_t mutex;
|
||||
};
|
||||
|
||||
|
||||
Mutex::Mutex () : m(new Private)
|
||||
{
|
||||
int result = pthread_mutex_init(&m->mutex , NULL);
|
||||
nvDebugCheck(result == 0);
|
||||
}
|
||||
|
||||
Mutex::~Mutex ()
|
||||
{
|
||||
int result = pthread_mutex_destroy(&m->mutex);
|
||||
nvDebugCheck(result == 0);
|
||||
}
|
||||
|
||||
void Mutex::lock()
|
||||
{
|
||||
int result = pthread_mutex_lock(&m->mutex);
|
||||
nvDebugCheck(result == 0);
|
||||
}
|
||||
|
||||
bool Mutex::tryLock()
|
||||
{
|
||||
int result = pthread_mutex_trylock(&m->mutex);
|
||||
nvDebugCheck(result == 0 || result == EBUSY);
|
||||
return result == 0;
|
||||
}
|
||||
|
||||
void Mutex::unlock()
|
||||
{
|
||||
int result = pthread_mutex_unlock(&m->mutex);
|
||||
nvDebugCheck(result == 0);
|
||||
}
|
||||
|
||||
#endif // NV_OS
|
47
src/nvthread/Mutex.h
Normal file
47
src/nvthread/Mutex.h
Normal file
@ -0,0 +1,47 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#pragma once
|
||||
#ifndef NV_THREAD_MUTEX_H
|
||||
#define NV_THREAD_MUTEX_H
|
||||
|
||||
#include "nvthread.h"
|
||||
|
||||
#include "nvcore/Ptr.h"
|
||||
|
||||
namespace nv
|
||||
{
|
||||
|
||||
class NVTHREAD_CLASS Mutex
|
||||
{
|
||||
NV_FORBID_COPY(Mutex);
|
||||
public:
|
||||
Mutex ();
|
||||
~Mutex ();
|
||||
|
||||
void lock();
|
||||
bool tryLock();
|
||||
void unlock();
|
||||
|
||||
private:
|
||||
struct Private;
|
||||
AutoPtr<Private> m;
|
||||
};
|
||||
|
||||
|
||||
// Templated lock that can be used with any mutex.
|
||||
template <class M>
|
||||
class Lock
|
||||
{
|
||||
NV_FORBID_COPY(Lock);
|
||||
public:
|
||||
|
||||
Lock (M & m) : m_mutex (m) { m_mutex.lock(); }
|
||||
~Lock () { m_mutex.unlock(); }
|
||||
|
||||
private:
|
||||
M & m_mutex;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_THREAD_MUTEX_H
|
61
src/nvthread/ParallelFor.cpp
Normal file
61
src/nvthread/ParallelFor.cpp
Normal file
@ -0,0 +1,61 @@
|
||||
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
|
||||
|
||||
#include "ParallelFor.h"
|
||||
#include "Thread.h"
|
||||
#include "Atomic.h"
|
||||
#include "ThreadPool.h"
|
||||
|
||||
using namespace nv;
|
||||
|
||||
#define ENABLE_PARALLEL_FOR 1
|
||||
|
||||
|
||||
void worker(void * arg) {
|
||||
ParallelFor * owner = (ParallelFor *)arg;
|
||||
|
||||
while(true) {
|
||||
// Consume one element at a time. @@ Might be more efficient to have custom grain.
|
||||
uint i = atomicIncrement(&owner->idx);
|
||||
if (i > owner->count) {
|
||||
break;
|
||||
}
|
||||
|
||||
owner->task(owner->context, i - 1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ParallelFor::ParallelFor(ForTask * task, void * context) : task(task), context(context) {
|
||||
#if ENABLE_PARALLEL_FOR
|
||||
pool = ThreadPool::acquire();
|
||||
#endif
|
||||
}
|
||||
|
||||
ParallelFor::~ParallelFor() {
|
||||
#if ENABLE_PARALLEL_FOR
|
||||
ThreadPool::release(pool);
|
||||
#endif
|
||||
}
|
||||
|
||||
void ParallelFor::run(uint count) {
|
||||
#if ENABLE_PARALLEL_FOR
|
||||
storeRelease(&this->count, count);
|
||||
|
||||
// Init atomic counter to zero.
|
||||
storeRelease(&idx, 0);
|
||||
|
||||
// Start threads.
|
||||
pool->start(worker, this);
|
||||
|
||||
// Wait for all threads to complete.
|
||||
pool->wait();
|
||||
|
||||
nvDebugCheck(idx >= count);
|
||||
#else
|
||||
for (int i = 0; i < count; i++) {
|
||||
task(context, i);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
38
src/nvthread/ParallelFor.h
Normal file
38
src/nvthread/ParallelFor.h
Normal file
@ -0,0 +1,38 @@
|
||||
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
|
||||
|
||||
#pragma once
|
||||
#ifndef NV_THREAD_PARALLELFOR_H
|
||||
#define NV_THREAD_PARALLELFOR_H
|
||||
|
||||
#include "nvthread.h"
|
||||
//#include "Atomic.h" // atomic<uint>
|
||||
|
||||
namespace nv
|
||||
{
|
||||
class Thread;
|
||||
class ThreadPool;
|
||||
|
||||
typedef void ForTask(void * context, int id);
|
||||
|
||||
struct ParallelFor {
|
||||
ParallelFor(ForTask * task, void * context);
|
||||
~ParallelFor();
|
||||
|
||||
void run(uint count);
|
||||
|
||||
// Invariant:
|
||||
ForTask * task;
|
||||
void * context;
|
||||
ThreadPool * pool;
|
||||
//uint workerCount; // @@ Move to thread pool.
|
||||
//Thread * workers;
|
||||
|
||||
// State:
|
||||
uint count;
|
||||
/*atomic<uint>*/ uint idx;
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
#endif // NV_THREAD_PARALLELFOR_H
|
136
src/nvthread/Thread.cpp
Normal file
136
src/nvthread/Thread.cpp
Normal file
@ -0,0 +1,136 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#include "Thread.h"
|
||||
|
||||
#if NV_OS_WIN32
|
||||
#include "Win32.h"
|
||||
#elif NV_OS_UNIX
|
||||
#include <pthread.h>
|
||||
#include <unistd.h> // usleep
|
||||
#endif
|
||||
|
||||
using namespace nv;
|
||||
|
||||
struct Thread::Private
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
HANDLE thread;
|
||||
#elif NV_OS_UNIX
|
||||
pthread_t thread;
|
||||
#endif
|
||||
|
||||
ThreadFunc * func;
|
||||
void * arg;
|
||||
};
|
||||
|
||||
#if NV_OS_WIN32
|
||||
|
||||
unsigned long __stdcall threadFunc(void * arg) {
|
||||
Thread * thread = (Thread *)arg;
|
||||
thread->func(thread->arg);
|
||||
return 0;
|
||||
}
|
||||
|
||||
#elif NV_OS_UNIX
|
||||
extern "C" void * threadFunc(void * arg) {
|
||||
Thread * thread = (Thread *)arg;
|
||||
thread->func(thread->arg);
|
||||
pthread_exit(0);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
Thread::Thread() : p(new Private)
|
||||
{
|
||||
p->thread = 0;
|
||||
}
|
||||
|
||||
Thread::~Thread()
|
||||
{
|
||||
nvDebugCheck(p->thread == 0);
|
||||
}
|
||||
|
||||
void Thread::start(ThreadFunc * func, void * arg)
|
||||
{
|
||||
this->func = func;
|
||||
this->arg = arg;
|
||||
|
||||
#if NV_OS_WIN32
|
||||
p->thread = CreateThread(NULL, 0, threadFunc, this, 0, NULL);
|
||||
//p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, this, 0, NULL); // @@ So that we can call CRT functions...
|
||||
nvDebugCheck(p->thread != NULL);
|
||||
#elif NV_OS_UNIX
|
||||
int result = pthread_create(&p->thread, NULL, threadFunc, this);
|
||||
nvDebugCheck(result == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
void Thread::wait()
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
DWORD status = WaitForSingleObject (p->thread, INFINITE);
|
||||
nvCheck (status == WAIT_OBJECT_0);
|
||||
BOOL ok = CloseHandle (p->thread);
|
||||
p->thread = NULL;
|
||||
nvCheck (ok);
|
||||
#elif NV_OS_UNIX
|
||||
int result = pthread_join(p->thread, NULL);
|
||||
p->thread = 0;
|
||||
nvDebugCheck(result == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool Thread::isRunning () const
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
return p->thread != NULL;
|
||||
#elif NV_OS_UNIX
|
||||
return p->thread != 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/*static*/ void Thread::spinWait(uint count)
|
||||
{
|
||||
for (uint i = 0; i < count; i++) {}
|
||||
}
|
||||
|
||||
/*static*/ void Thread::yield()
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
SwitchToThread();
|
||||
#elif NV_OS_UNIX
|
||||
int result = sched_yield();
|
||||
nvDebugCheck(result == 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*static*/ void Thread::sleep(uint ms)
|
||||
{
|
||||
#if NV_OS_WIN32
|
||||
Sleep(ms);
|
||||
#elif NV_OS_UNIX
|
||||
usleep(1000 * ms);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*static*/ void Thread::wait(Thread * threads, uint count)
|
||||
{
|
||||
/*#if NV_OS_WIN32
|
||||
// @@ Is there any advantage in doing this?
|
||||
nvDebugCheck(count < MAXIMUM_WAIT_OBJECTS);
|
||||
|
||||
HANDLE * handles = new HANDLE[count];
|
||||
for (uint i = 0; i < count; i++) {
|
||||
handles[i] = threads->p->thread;
|
||||
}
|
||||
|
||||
DWORD result = WaitForMultipleObjects(count, handles, TRUE, INFINITE);
|
||||
|
||||
|
||||
delete [] handles;
|
||||
#else*/
|
||||
for (uint i = 0; i < count; i++) {
|
||||
threads[i].wait();
|
||||
}
|
||||
//#endif
|
||||
}
|
46
src/nvthread/Thread.h
Normal file
46
src/nvthread/Thread.h
Normal file
@ -0,0 +1,46 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#pragma once
|
||||
#ifndef NV_THREAD_THREAD_H
|
||||
#define NV_THREAD_THREAD_H
|
||||
|
||||
#include "nvthread.h"
|
||||
|
||||
#include "nvcore/Ptr.h"
|
||||
|
||||
namespace nv
|
||||
{
|
||||
typedef void ThreadFunc(void * arg);
|
||||
|
||||
class NVTHREAD_CLASS Thread
|
||||
{
|
||||
NV_FORBID_COPY(Thread);
|
||||
public:
|
||||
Thread();
|
||||
~Thread();
|
||||
|
||||
void start(ThreadFunc * func, void * arg);
|
||||
void wait();
|
||||
|
||||
bool isRunning() const;
|
||||
|
||||
static void spinWait(uint count);
|
||||
static void yield();
|
||||
static void sleep(uint ms);
|
||||
|
||||
static void wait(Thread * threads, uint count);
|
||||
|
||||
private:
|
||||
|
||||
struct Private;
|
||||
AutoPtr<Private> p;
|
||||
|
||||
public:
|
||||
ThreadFunc * func;
|
||||
void * arg;
|
||||
|
||||
};
|
||||
|
||||
} // nv namespace
|
||||
|
||||
#endif // NV_THREAD_THREAD_H
|
121
src/nvthread/ThreadPool.cpp
Normal file
121
src/nvthread/ThreadPool.cpp
Normal file
@ -0,0 +1,121 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#include "ThreadPool.h"
|
||||
#include "Mutex.h"
|
||||
#include "Thread.h"
|
||||
|
||||
// Most of the time it's not necessary to protect the thread pool, but if it doesn't add a significant overhead, then it'd be safer to do it.
|
||||
#define PROTECT_THREAD_POOL 1
|
||||
|
||||
|
||||
using namespace nv;
|
||||
|
||||
#if PROTECT_THREAD_POOL
|
||||
Mutex s_pool_mutex;
|
||||
#endif
|
||||
|
||||
AutoPtr<ThreadPool> s_pool;
|
||||
|
||||
|
||||
/*static*/ ThreadPool * ThreadPool::acquire()
|
||||
{
|
||||
#if PROTECT_THREAD_POOL
|
||||
s_pool_mutex.lock(); // @@ If same thread tries to lock twice, this should assert.
|
||||
#endif
|
||||
|
||||
if (s_pool == NULL) {
|
||||
ThreadPool * p = new ThreadPool;
|
||||
nvDebugCheck(s_pool == p);
|
||||
}
|
||||
|
||||
return s_pool.ptr();
|
||||
}
|
||||
|
||||
/*static*/ void ThreadPool::release(ThreadPool * pool)
|
||||
{
|
||||
nvDebugCheck(pool == s_pool);
|
||||
|
||||
// Make sure the threads of the pool are idle.
|
||||
s_pool->wait();
|
||||
|
||||
#if PROTECT_THREAD_POOL
|
||||
s_pool_mutex.unlock();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
/*static*/ void ThreadPool::workerFunc(void * arg) {
|
||||
uint i = (uint)arg;
|
||||
|
||||
while(true)
|
||||
{
|
||||
s_pool->startEvents[i].wait();
|
||||
|
||||
if (s_pool->func == NULL) {
|
||||
return; // @@ should we post finish event anyway?
|
||||
}
|
||||
|
||||
s_pool->func(s_pool->arg);
|
||||
|
||||
s_pool->finishEvents[i].post();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
ThreadPool::ThreadPool()
|
||||
{
|
||||
s_pool = this; // Worker threads need this to be initialized before they start.
|
||||
|
||||
workerCount = nv::hardwareThreadCount();
|
||||
workers = new Thread[workerCount];
|
||||
|
||||
startEvents = new Event[workerCount];
|
||||
finishEvents = new Event[workerCount];
|
||||
|
||||
for (uint i = 0; i < workerCount; i++) {
|
||||
workers[i].start(workerFunc, (void *)i);
|
||||
}
|
||||
|
||||
allIdle = true;
|
||||
}
|
||||
|
||||
ThreadPool::~ThreadPool()
|
||||
{
|
||||
// Set threads to terminate.
|
||||
start(NULL, NULL);
|
||||
|
||||
// Wait until threads actually exit.
|
||||
Thread::wait(workers, workerCount);
|
||||
|
||||
delete [] workers;
|
||||
delete [] startEvents;
|
||||
delete [] finishEvents;
|
||||
}
|
||||
|
||||
void ThreadPool::start(ThreadFunc * func, void * arg)
|
||||
{
|
||||
// Wait until threads are idle.
|
||||
wait();
|
||||
|
||||
// Set our desired function.
|
||||
this->func = func;
|
||||
this->arg = arg;
|
||||
|
||||
allIdle = false;
|
||||
|
||||
// Resume threads.
|
||||
Event::post(startEvents, workerCount);
|
||||
}
|
||||
|
||||
void ThreadPool::wait()
|
||||
{
|
||||
if (!allIdle)
|
||||
{
|
||||
// Wait for threads to complete.
|
||||
Event::wait(finishEvents, workerCount);
|
||||
|
||||
allIdle = true;
|
||||
}
|
||||
}
|
49
src/nvthread/ThreadPool.h
Normal file
49
src/nvthread/ThreadPool.h
Normal file
@ -0,0 +1,49 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
#pragma once
|
||||
#ifndef NV_THREAD_THREADPOOL_H
|
||||
#define NV_THREAD_THREADPOOL_H
|
||||
|
||||
#include "nvthread.h"
|
||||
|
||||
#include "Event.h"
|
||||
#include "Thread.h"
|
||||
|
||||
namespace nv {
|
||||
|
||||
class Thread;
|
||||
class Event;
|
||||
|
||||
class ThreadPool {
|
||||
NV_FORBID_COPY(ThreadPool);
|
||||
public:
|
||||
|
||||
static ThreadPool * acquire();
|
||||
static void release(ThreadPool *);
|
||||
|
||||
ThreadPool();
|
||||
~ThreadPool();
|
||||
|
||||
void start(ThreadFunc * func, void * arg);
|
||||
void wait();
|
||||
|
||||
private:
|
||||
|
||||
static void workerFunc(void * arg);
|
||||
|
||||
uint workerCount;
|
||||
Thread * workers;
|
||||
Event * startEvents;
|
||||
Event * finishEvents;
|
||||
|
||||
uint allIdle;
|
||||
|
||||
// Current function:
|
||||
ThreadFunc * func;
|
||||
void * arg;
|
||||
};
|
||||
|
||||
} // namespace nv
|
||||
|
||||
|
||||
#endif // NV_THREAD_THREADPOOL_H
|
9
src/nvthread/Win32.h
Normal file
9
src/nvthread/Win32.h
Normal file
@ -0,0 +1,9 @@
|
||||
// This code is in the public domain -- castano@gmail.com
|
||||
|
||||
// Never include this from a header file.
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define VC_EXTRALEAN
|
||||
#define _WIN32_WINNT 0x0400 // for SwitchToThread, TryEnterCriticalSection
|
||||
#include <windows.h>
|
||||
//#include <process.h> // for _beginthreadex
|
51
src/nvthread/nvthread.cpp
Normal file
51
src/nvthread/nvthread.cpp
Normal file
@ -0,0 +1,51 @@
|
||||
|
||||
#include "nvthread.h"
|
||||
|
||||
#include "Thread.h"
|
||||
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#define VC_EXTRALEAN
|
||||
#include <windows.h>
|
||||
|
||||
using namespace nv;
|
||||
|
||||
|
||||
// Find the number of cores in the system.
|
||||
// Based on: http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
|
||||
// @@ Distinguish between logical and physical cores?
|
||||
uint nv::hardwareThreadCount() {
|
||||
#if NV_OS_WIN32
|
||||
SYSTEM_INFO sysinfo;
|
||||
GetSystemInfo( &sysinfo );
|
||||
return sysinfo.dwNumberOfProcessors;
|
||||
#elif NV_OS_XBOX
|
||||
return 3; // or 6?
|
||||
#elif NV_OS_LINUX // Linux, Solaris, & AIX
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#elif NV_OS_DARWIN || NV_OS_FREEBSD
|
||||
int numCPU;
|
||||
int mib[4];
|
||||
size_t len = sizeof(numCPU);
|
||||
|
||||
// set the mib for hw.ncpu
|
||||
mib[0] = CTL_HW;
|
||||
mib[1] = HW_AVAILCPU; // alternatively, try HW_NCPU;
|
||||
|
||||
// get the number of CPUs from the system
|
||||
sysctl(mib, 2, &numCPU, &len, NULL, 0);
|
||||
|
||||
if (numCPU < 1) {
|
||||
mib[1] = HW_NCPU;
|
||||
sysctl( mib, 2, &numCPU, &len, NULL, 0 );
|
||||
|
||||
if (numCPU < 1) {
|
||||
return 1; // Assume single core.
|
||||
}
|
||||
}
|
||||
|
||||
return numCPU;
|
||||
#else
|
||||
return 1; // Assume single core.
|
||||
#endif
|
||||
}
|
||||
|
83
src/nvthread/nvthread.h
Normal file
83
src/nvthread/nvthread.h
Normal file
@ -0,0 +1,83 @@
|
||||
// This code is in the public domain -- castanyo@yahoo.es
|
||||
|
||||
#pragma once
|
||||
#ifndef NV_THREAD_H
|
||||
#define NV_THREAD_H
|
||||
|
||||
#include "nvcore/nvcore.h"
|
||||
|
||||
// Function linkage
|
||||
#if NVTHREAD_SHARED
|
||||
#ifdef NVTHREAD_EXPORTS
|
||||
#define NVTHREAD_API DLL_EXPORT
|
||||
#define NVTHREAD_CLASS DLL_EXPORT_CLASS
|
||||
#else
|
||||
#define NVTHREAD_API DLL_IMPORT
|
||||
#define NVTHREAD_CLASS DLL_IMPORT
|
||||
#endif
|
||||
#else // NVMATH_SHARED
|
||||
#define NVTHREAD_API
|
||||
#define NVTHREAD_CLASS
|
||||
#endif // NVMATH_SHARED
|
||||
|
||||
|
||||
// Compiler barriers.
|
||||
// See: http://en.wikipedia.org/wiki/Memory_ordering
|
||||
#if NV_CC_MSVC
|
||||
|
||||
#include <intrin.h>
|
||||
|
||||
#pragma intrinsic(_WriteBarrier)
|
||||
#define nvCompilerWriteBarrier _WriteBarrier
|
||||
|
||||
#pragma intrinsic(_ReadWriteBarrier)
|
||||
#define nvCompilerReadWriteBarrier _ReadWriteBarrier
|
||||
|
||||
#if _MSC_VER >= 1400 // ReadBarrier is VC2005
|
||||
#pragma intrinsic(_ReadBarrier)
|
||||
#define nvCompilerReadBarrier _ReadBarrier
|
||||
#else
|
||||
#define nvCompilerReadBarrier _ReadWriteBarrier
|
||||
#endif
|
||||
|
||||
#elif NV_CC_GNUC
|
||||
|
||||
#define nvCompilerReadWriteBarrier() asm volatile("" ::: "memory");
|
||||
#define nvCompilerWriteBarrier nvCompilerReadWriteBarrier
|
||||
#define nvCompilerReadBarrier nvCompilerReadWriteBarrier
|
||||
|
||||
#endif // NV_CC_MSVC
|
||||
|
||||
|
||||
// @@ Memory barriers / fences.
|
||||
|
||||
// @@ Atomics.
|
||||
|
||||
|
||||
/* Wrap this up:
|
||||
#define YieldProcessor() __asm { rep nop }
|
||||
#define YieldProcessor _mm_pause
|
||||
#define YieldProcessor __yield
|
||||
|
||||
BOOL WINAPI SwitchToThread(void);
|
||||
*/
|
||||
|
||||
|
||||
namespace nv
|
||||
{
|
||||
// Reentrant.
|
||||
uint hardwareThreadCount();
|
||||
|
||||
// Not thread-safe. Use from main thread only.
|
||||
void initWorkers();
|
||||
void shutWorkers();
|
||||
void setWorkerFunction(void * func);
|
||||
|
||||
} // nv namespace
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
#endif // NV_THREAD_H
|
@ -37,7 +37,7 @@ using namespace nv;
|
||||
using namespace nvtt;
|
||||
|
||||
|
||||
void CompressorBC6::compressBlock(Tile & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
|
||||
void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
NV_UNUSED(alphaMode); // ZOH does not support alpha.
|
||||
|
||||
@ -56,7 +56,7 @@ void CompressorBC6::compressBlock(Tile & tile, AlphaMode alphaMode, const Compre
|
||||
}
|
||||
|
||||
|
||||
void CompressorBC7::compressBlock(Tile & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
|
||||
void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
|
||||
{
|
||||
// @@ TODO
|
||||
}
|
||||
|
@ -481,10 +481,10 @@ void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode
|
||||
|
||||
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
|
||||
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
int size = rect.Pitch * ((h + 3) / 4);
|
||||
outputOptions.outputHandler->writeData(rect.pBits, size);
|
||||
}
|
||||
if (outputOptions.outputHandler != NULL) {
|
||||
int size = rect.Pitch * ((h + 3) / 4);
|
||||
outputOptions.outputHandler->writeData(rect.pBits, size);
|
||||
}
|
||||
|
||||
err = surface->UnlockRect();
|
||||
}
|
||||
|
@ -110,7 +110,7 @@ namespace
|
||||
{
|
||||
nvDebugCheck(alignment >= 1);
|
||||
flush();
|
||||
int remainder = (size_t)ptr % alignment;
|
||||
int remainder = (int)((uintptr_t)ptr % alignment);
|
||||
if (remainder != 0) {
|
||||
putBits(0, (alignment - remainder) * 8);
|
||||
}
|
||||
|
@ -349,6 +349,8 @@ bool Compressor::Private::compress(AlphaMode alphaMode, int w, int h, int d, int
|
||||
compressor->compress(alphaMode, w, h, d, rgba, dispatcher, compressionOptions, outputOptions);
|
||||
}
|
||||
|
||||
outputOptions.endImage();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
@ -135,6 +135,11 @@ bool OutputOptions::Private::writeData(const void * data, int size) const
|
||||
return outputHandler == NULL || outputHandler->writeData(data, size);
|
||||
}
|
||||
|
||||
void OutputOptions::Private::endImage() const
|
||||
{
|
||||
if (outputHandler != NULL) outputHandler->endImage();
|
||||
}
|
||||
|
||||
void OutputOptions::Private::error(Error e) const
|
||||
{
|
||||
if (errorHandler != NULL) errorHandler->error(e);
|
||||
|
@ -52,6 +52,11 @@ namespace nvtt
|
||||
return true;
|
||||
}
|
||||
|
||||
virtual void endImage()
|
||||
{
|
||||
// ignore.
|
||||
}
|
||||
|
||||
nv::StdOutputStream stream;
|
||||
};
|
||||
|
||||
@ -72,6 +77,7 @@ namespace nvtt
|
||||
|
||||
void beginImage(int size, int width, int height, int depth, int face, int miplevel) const;
|
||||
bool writeData(const void * data, int size) const;
|
||||
void endImage() const;
|
||||
void error(Error e) const;
|
||||
};
|
||||
|
||||
|
@ -18,8 +18,8 @@
|
||||
// http://msdn.microsoft.com/en-us/library/dd504870.aspx
|
||||
#if NV_OS_WIN32 && _MSC_VER >= 1600
|
||||
#define HAVE_PPL 1
|
||||
//#include <array>
|
||||
#include <ppl.h>
|
||||
#include <array>
|
||||
//#include <ppl.h>
|
||||
#endif
|
||||
|
||||
// Intel Thread Building Blocks (TBB).
|
||||
@ -28,6 +28,8 @@
|
||||
#include <tbb/parallel_for.h>
|
||||
#endif
|
||||
|
||||
#include "nvthread/ParallelFor.h"
|
||||
|
||||
|
||||
namespace nvtt {
|
||||
|
||||
@ -40,6 +42,15 @@ namespace nvtt {
|
||||
}
|
||||
};
|
||||
|
||||
struct ParallelTaskDispatcher : public TaskDispatcher
|
||||
{
|
||||
virtual void dispatch(Task * task, void * context, int count) {
|
||||
nv::ParallelFor parallelFor(task, context);
|
||||
parallelFor.run(count); // @@ Add support for custom grain.
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
#if defined(HAVE_OPENMP)
|
||||
|
||||
struct OpenMPTaskDispatcher : public TaskDispatcher
|
||||
@ -81,9 +92,24 @@ namespace nvtt {
|
||||
|
||||
#if defined(HAVE_PPL)
|
||||
|
||||
class CountingIterator
|
||||
{
|
||||
public:
|
||||
CountingIterator() : i(0) {}
|
||||
CountingIterator(const CountingIterator & rhs) : i(0) {}
|
||||
explicit CountingIterator(int x) : i(x) {}
|
||||
|
||||
const int & operator*() const { return i; }
|
||||
CountingIterator & operator++() { i++; return *this; }
|
||||
CountingIterator & operator--() { i--; return *this; }
|
||||
|
||||
private:
|
||||
int i;
|
||||
};
|
||||
|
||||
struct TaskFunctor {
|
||||
TaskFunctor(Task * task, void * context) : task(task), context(context) {}
|
||||
void operator()(int n) const {
|
||||
void operator()(int & n) const {
|
||||
task(context, n);
|
||||
}
|
||||
Task * task;
|
||||
@ -95,12 +121,16 @@ namespace nvtt {
|
||||
{
|
||||
virtual void dispatch(Task * task, void * context, int count)
|
||||
{
|
||||
CountingIterator begin(0);
|
||||
CountingIterator end((int)count);
|
||||
TaskFunctor func(task, context);
|
||||
Concurrency::parallel_for(0, count, func);
|
||||
|
||||
std::for_each(begin, end, func);
|
||||
//parallel_for_each(begin, end, func);
|
||||
}
|
||||
};
|
||||
|
||||
#endif // HAVE_PPL
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_TBB)
|
||||
|
||||
@ -132,7 +162,8 @@ namespace nvtt {
|
||||
#elif defined(HAVE_GCD)
|
||||
typedef AppleTaskDispatcher ConcurrentTaskDispatcher;
|
||||
#else
|
||||
typedef SequentialTaskDispatcher ConcurrentTaskDispatcher;
|
||||
//typedef SequentialTaskDispatcher ConcurrentTaskDispatcher;
|
||||
typedef ParallelTaskDispatcher ConcurrentTaskDispatcher;
|
||||
#endif
|
||||
|
||||
} // namespace nvtt
|
||||
|
@ -615,7 +615,7 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
|
||||
block->decodeBlock(&colors, false);
|
||||
}
|
||||
else if (decoder == Decoder_NV5x) {
|
||||
block->decodeBlockNV5x(&colors);
|
||||
block->decodeBlockNV5x(&colors);
|
||||
}
|
||||
}
|
||||
else if (format == nvtt::Format_BC3)
|
||||
@ -629,19 +629,19 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
|
||||
block->decodeBlock(&colors, false);
|
||||
}
|
||||
else if (decoder == Decoder_NV5x) {
|
||||
block->decodeBlockNV5x(&colors);
|
||||
block->decodeBlockNV5x(&colors);
|
||||
}
|
||||
}
|
||||
else if (format == nvtt::Format_BC4)
|
||||
{
|
||||
const BlockATI1 * block = (const BlockATI1 *)ptr;
|
||||
block->decodeBlock(&colors, decoder == Decoder_D3D9);
|
||||
}
|
||||
else if (format == nvtt::Format_BC5)
|
||||
{
|
||||
const BlockATI2 * block = (const BlockATI2 *)ptr;
|
||||
block->decodeBlock(&colors, decoder == Decoder_D3D9);
|
||||
}
|
||||
const BlockATI1 * block = (const BlockATI1 *)ptr;
|
||||
block->decodeBlock(&colors, decoder == Decoder_D3D9);
|
||||
}
|
||||
else if (format == nvtt::Format_BC5)
|
||||
{
|
||||
const BlockATI2 * block = (const BlockATI2 *)ptr;
|
||||
block->decodeBlock(&colors, decoder == Decoder_D3D9);
|
||||
}
|
||||
|
||||
for (int yy = 0; yy < 4; yy++)
|
||||
{
|
||||
@ -864,6 +864,42 @@ bool TexImage::buildNextMipmap(MipmapFilter filter, float filterWidth, const flo
|
||||
return true;
|
||||
}
|
||||
|
||||
void TexImage::canvasSize(int w, int h, int d)
|
||||
{
|
||||
nvDebugCheck(w > 0 && h > 0 && d > 0);
|
||||
|
||||
FloatImage * img = m->image;
|
||||
if (img == NULL || (w == img->width() && h == img->height() && d == img->depth())) {
|
||||
return;
|
||||
}
|
||||
|
||||
detach();
|
||||
|
||||
FloatImage * new_img = new FloatImage;
|
||||
new_img->allocate(4, w, h, d);
|
||||
new_img->clear();
|
||||
|
||||
w = min(uint(w), img->width());
|
||||
h = min(uint(h), img->height());
|
||||
d = min(uint(d), img->depth());
|
||||
|
||||
for (int z = 0; z < d; z++) {
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
new_img->pixel(0, x, y, z) = img->pixel(0, x, y, z);
|
||||
new_img->pixel(1, x, y, z) = img->pixel(1, x, y, z);
|
||||
new_img->pixel(2, x, y, z) = img->pixel(2, x, y, z);
|
||||
new_img->pixel(3, x, y, z) = img->pixel(3, x, y, z);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete m->image;
|
||||
m->image = new_img;
|
||||
m->type = (d == 1) ? TextureType_2D : TextureType_3D;
|
||||
}
|
||||
|
||||
|
||||
// Color transforms.
|
||||
void TexImage::toLinear(float gamma)
|
||||
{
|
||||
@ -885,6 +921,66 @@ void TexImage::toGamma(float gamma)
|
||||
m->image->toGamma(0, 3, gamma);
|
||||
}
|
||||
|
||||
|
||||
static float toSrgb(float f) {
|
||||
if (f <= 0.0) f = 0.0f;
|
||||
else if (f <= 0.0031308f) f = 12.92f * f;
|
||||
else if (f <= 1.0f) f = (powf(f, 0.41666f) * 1.055f) - 0.055f;
|
||||
else f = 1.0f;
|
||||
return f;
|
||||
}
|
||||
|
||||
void TexImage::toSrgb()
|
||||
{
|
||||
FloatImage * img = m->image;
|
||||
if (img == NULL) return;
|
||||
|
||||
detach();
|
||||
|
||||
const uint count = img->pixelCount();
|
||||
for (uint j = 0; j < count; j++)
|
||||
{
|
||||
float & r = img->pixel(0, j);
|
||||
float & g = img->pixel(1, j);
|
||||
float & b = img->pixel(2, j);
|
||||
|
||||
r = ::toSrgb(r);
|
||||
g = ::toSrgb(g);
|
||||
b = ::toSrgb(b);
|
||||
}
|
||||
}
|
||||
|
||||
static float toXenonSrgb(float f) {
|
||||
if (f < 0) f = 0;
|
||||
else if (f < (1.0f/16.0f)) f = 4.0f * f;
|
||||
else if (f < (1.0f/8.0f)) f = 0.25f + 2.0f * (f - 0.0625f);
|
||||
else if (f < 0.5f) f = 0.375f + 1.0f * (f - 0.125f);
|
||||
else if (f < 1.0f) f = 0.75f + 0.5f * (f - 0.50f);
|
||||
else f = 1.0f;
|
||||
return f;
|
||||
}
|
||||
|
||||
void TexImage::toXenonSrgb()
|
||||
{
|
||||
FloatImage * img = m->image;
|
||||
if (img == NULL) return;
|
||||
|
||||
detach();
|
||||
|
||||
const uint count = img->pixelCount();
|
||||
for (uint j = 0; j < count; j++)
|
||||
{
|
||||
float & r = img->pixel(0, j);
|
||||
float & g = img->pixel(1, j);
|
||||
float & b = img->pixel(2, j);
|
||||
|
||||
r = ::toXenonSrgb(r);
|
||||
g = ::toXenonSrgb(g);
|
||||
b = ::toXenonSrgb(b);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void TexImage::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4])
|
||||
{
|
||||
if (m->image == NULL) return;
|
||||
@ -1140,9 +1236,9 @@ void TexImage::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
|
||||
|
||||
const uint count = img->pixelCount();
|
||||
for (uint i = 0; i < count; i++) {
|
||||
float R = nv::clamp(r[i] * irange, 0.0f, 1.0f);
|
||||
float G = nv::clamp(g[i] * irange, 0.0f, 1.0f);
|
||||
float B = nv::clamp(b[i] * irange, 0.0f, 1.0f);
|
||||
float R = nv::clamp(r[i], 0.0f, 1.0f);
|
||||
float G = nv::clamp(g[i], 0.0f, 1.0f);
|
||||
float B = nv::clamp(b[i], 0.0f, 1.0f);
|
||||
#if 1
|
||||
float M = max(max(R, G), max(B, threshold));
|
||||
|
||||
|
@ -294,6 +294,9 @@ namespace nvtt
|
||||
|
||||
/// Output data. Compressed data is output as soon as it's generated to minimize memory allocations.
|
||||
virtual bool writeData(const void * data, int size) = 0;
|
||||
|
||||
/// Indicate the end of a the compressed image.
|
||||
virtual void endImage() = 0;
|
||||
};
|
||||
|
||||
/// Error codes.
|
||||
@ -440,10 +443,13 @@ namespace nvtt
|
||||
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
|
||||
NVTT_API bool buildNextMipmap(MipmapFilter filter);
|
||||
NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0);
|
||||
NVTT_API void canvasSize(int w, int h, int d);
|
||||
|
||||
// Color transforms.
|
||||
NVTT_API void toLinear(float gamma);
|
||||
NVTT_API void toGamma(float gamma);
|
||||
NVTT_API void toSrgb();
|
||||
NVTT_API void toXenonSrgb();
|
||||
NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]);
|
||||
NVTT_API void swizzle(int r, int g, int b, int a);
|
||||
NVTT_API void scaleBias(int channel, float scale, float bias);
|
||||
|
@ -56,6 +56,11 @@ struct MyOutputHandler : public nvtt::OutputHandler
|
||||
// ignore.
|
||||
}
|
||||
|
||||
virtual void endImage()
|
||||
{
|
||||
// Ignore.
|
||||
}
|
||||
|
||||
// Output data.
|
||||
virtual bool writeData(const void * data, int size)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user