Merge changes from the witness.

pull/216/head
castano 13 years ago
parent 9c0658edca
commit 3c0ab2d3f3

@ -0,0 +1,346 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="9.00"
Name="nvthread"
ProjectGUID="{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}"
RootNamespace="nvthread"
Keyword="Win32Proj"
TargetFrameworkVersion="131072"
>
<Platforms>
<Platform
Name="Win32"
/>
<Platform
Name="x64"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="2"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Debug|x64"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="0"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="2"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="3"
InlineFunctionExpansion="0"
EnableIntrinsicFunctions="true"
FavorSizeOrSpeed="0"
OmitFramePointers="true"
EnableFiberSafeOptimizations="true"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="false"
EnableEnhancedInstructionSet="2"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|x64"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="4"
InheritedPropertySheets="$(SolutionDir)\nvtt.vsprops"
CharacterSet="0"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
TargetEnvironment="3"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="3"
EnableIntrinsicFunctions="true"
OmitFramePointers="true"
WholeProgramOptimization="true"
AdditionalIncludeDirectories=""
PreprocessorDefinitions="WIN32;NDEBUG;_LIB;__SSE2__;__SSE__;__MMX__"
StringPooling="true"
RuntimeLibrary="2"
EnableFunctionLevelLinking="false"
UsePrecompiledHeader="0"
WarningLevel="3"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLibrarianTool"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<File
RelativePath="..\..\..\src\nvthread\Atomic.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Event.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Event.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Mutex.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Mutex.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\nvthread.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\nvthread.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\ParallelFor.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\ParallelFor.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Thread.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\Thread.h"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\ThreadPool.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvthread\ThreadPool.h"
>
</File>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

@ -4,6 +4,7 @@ Microsoft Visual Studio Solution File, Format Version 10.00
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB} = {3DD3A43D-C6EA-460F-821B-6C339A03C5BB}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
@ -88,6 +89,8 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "imperativeapi", "imperative
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc6h", "bc6h\bc6h.vcproj", "{C33787E3-5564-4834-9FE3-A9020455A669}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvthread", "nvthread\nvthread.vcproj", "{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Mixed Platforms = Debug (no cuda)|Mixed Platforms
@ -457,6 +460,28 @@ Global
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|Win32.Build.0 = Release|Win32
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.ActiveCfg = Release|x64
{C33787E3-5564-4834-9FE3-A9020455A669}.Release|x64.Build.0 = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Mixed Platforms.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Mixed Platforms.Build.0 = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|Win32.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|x64.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug (no cuda)|x64.Build.0 = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Mixed Platforms.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Mixed Platforms.Build.0 = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Win32.ActiveCfg = Debug|Win32
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|Win32.Build.0 = Debug|Win32
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|x64.ActiveCfg = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Debug|x64.Build.0 = Debug|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Mixed Platforms.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Mixed Platforms.Build.0 = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|Win32.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|x64.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release (no cuda)|x64.Build.0 = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Mixed Platforms.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Mixed Platforms.Build.0 = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Win32.ActiveCfg = Release|Win32
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|Win32.Build.0 = Release|Win32
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|x64.ActiveCfg = Release|x64
{3DD3A43D-C6EA-460F-821B-6C339A03C5BB}.Release|x64.Build.0 = Release|x64
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

@ -6,6 +6,7 @@ INCLUDE_DIRECTORIES(${NV_SOURCE_DIR}/extern/stb)
SUBDIRS(nvcore)
SUBDIRS(nvmath)
SUBDIRS(nvimage)
SUBDIRS(nvthread)
SUBDIRS(nvtt)
# OpenGL

@ -78,8 +78,8 @@ namespace nv
}
template <typename T>
bool find(const T & element, const T * restrict ptr, uint count, uint * index) {
for (uint i = 0; i < count; i++) {
bool find(const T & element, const T * restrict ptr, uint begin, uint end, uint * index) {
for (uint i = begin; i < end; i++) {
if (ptr[i] == element) {
if (index != NULL) *index = i;
return true;
@ -257,15 +257,15 @@ namespace nv
}
/// Return true if element found.
NV_FORCEINLINE bool find(const T & element, uint * index) const
NV_FORCEINLINE bool find(const T & element, uint * indexPtr) const
{
return find(element, 0, m_size, index);
return find(element, 0, m_size, indexPtr);
}
/// Return true if element found within the given range.
NV_FORCEINLINE bool find(const T & element, uint first, uint count, uint * index) const
NV_FORCEINLINE bool find(const T & element, uint begin, uint end, uint * indexPtr) const
{
return ::nv::find(element, m_buffer + first, count, index);
return ::nv::find(element, m_buffer, begin, end, indexPtr);
}
/// Remove the element at the given index. This is an expensive operation!

@ -448,19 +448,6 @@ namespace
/** Win32 assert handler. */
struct Win32AssertHandler : public AssertHandler
{
// Code from Daniel Vogel.
static bool isDebuggerPresent()
{
HINSTANCE kernel32 = GetModuleHandle("kernel32.dll");
if (kernel32) {
FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
return true;
}
}
return false;
}
// Flush the message queue. This is necessary for the message box to show up.
static void flushMessageQueue()
{
@ -487,7 +474,7 @@ namespace
nvDebug( error_string.str() );
}
if (isDebuggerPresent()) {
if (debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG;
}
@ -522,15 +509,6 @@ namespace
/** Xbox360 assert handler. */
struct Xbox360AssertHandler : public AssertHandler
{
static bool isDebuggerPresent()
{
#ifdef _DEBUG
return DmIsDebuggerPresent() == TRUE;
#else
return false;
#endif
}
// Assert handler method.
virtual int assertion( const char * exp, const char * file, int line, const char * func/*=NULL*/ )
{
@ -546,7 +524,7 @@ namespace
nvDebug( error_string.str() );
}
if (isDebuggerPresent()) {
if (debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG;
}
@ -563,26 +541,6 @@ namespace
/** Unix assert handler. */
struct UnixAssertHandler : public AssertHandler
{
bool isDebuggerPresent()
{
#if NV_OS_DARWIN
int mib[4];
struct kinfo_proc info;
size_t size;
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PID;
mib[3] = getpid();
size = sizeof(info);
info.kp_proc.p_flag = 0;
sysctl(mib,4,&info,&size,NULL,0);
return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
#else
// if ppid != sid, some process spawned our app, probably a debugger.
return getsid(getpid()) != getppid();
#endif
}
// Assert handler method.
virtual int assertion(const char * exp, const char * file, int line, const char * func)
{
@ -594,7 +552,7 @@ namespace
}
#if _DEBUG
if (isDebuggerPresent()) {
if (debug::isDebuggerPresent()) {
return NV_ABORT_DEBUG;
}
#endif
@ -702,7 +660,10 @@ void debug::enableSigHandler()
// SYMOPT_DEFERRED_LOADS make us not take a ton of time unless we actual log traces
SymSetOptions(SYMOPT_DEFERRED_LOADS|SYMOPT_FAIL_CRITICAL_ERRORS|SYMOPT_LOAD_LINES|SYMOPT_UNDNAME);
SymInitialize(GetCurrentProcess(), NULL, TRUE);
if (!SymInitialize(GetCurrentProcess(), NULL, TRUE)) {
DWORD error = GetLastError();
nvDebug("SymInitialize returned error : %d\n", error);
}
#elif !NV_OS_WIN32 && defined(HAVE_SIGNAL_H)
@ -743,3 +704,38 @@ void debug::disableSigHandler()
#endif
}
bool debug::isDebuggerPresent()
{
#if NV_OS_WIN32
HINSTANCE kernel32 = GetModuleHandle("kernel32.dll");
if (kernel32) {
FARPROC IsDebuggerPresent = GetProcAddress(kernel32, "IsDebuggerPresent");
if (IsDebuggerPresent != NULL && IsDebuggerPresent()) {
return true;
}
}
return false;
#elif NV_OS_XBOX
#ifdef _DEBUG
return DmIsDebuggerPresent() == TRUE;
#else
return false;
#endif
#elif NV_OS_DARWIN
int mib[4];
struct kinfo_proc info;
size_t size;
mib[0] = CTL_KERN;
mib[1] = KERN_PROC;
mib[2] = KERN_PROC_PID;
mib[3] = getpid();
size = sizeof(info);
info.kp_proc.p_flag = 0;
sysctl(mib,4,&info,&size,NULL,0);
return ((info.kp_proc.p_flag & P_TRACED) == P_TRACED);
#else
// if ppid != sid, some process spawned our app, probably a debugger.
return getsid(getpid()) != getppid();
#endif
}

@ -10,6 +10,9 @@
# include <stdarg.h> // va_list
#endif
// Make sure we are using our assert.
#undef assert
#define NV_ABORT_DEBUG 1
#define NV_ABORT_IGNORE 2
#define NV_ABORT_EXIT 3
@ -116,12 +119,6 @@
#endif
#if __cplusplus > 199711L
#define nvStaticCheck(x) static_assert(x)
#else
#define nvStaticCheck(x) typedef char NV_DO_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
#endif
NVCORE_API int nvAbort(const char *exp, const char *file, int line, const char * func = NULL);
NVCORE_API void NV_CDECL nvDebugPrint( const char *msg, ... ) __attribute__((format (printf, 1, 2)));
@ -166,6 +163,8 @@ namespace nv
NVCORE_API void enableSigHandler();
NVCORE_API void disableSigHandler();
NVCORE_API bool isDebuggerPresent();
}
} // nv namespace

@ -2,7 +2,7 @@
#error "Do not include this file directly."
#endif
//#include <stdint.h> // uint8_t, int8_t, ...
#include <stdint.h> // uint8_t, int8_t, ... uintptr_t
#include <cstddef> // operator new, size_t, NULL
// Function linkage
@ -67,4 +67,4 @@ typedef int64_t int64;
// Aliases
typedef uint32 uint;
*/
*/

@ -12,10 +12,10 @@
#include <new> // new and delete
#if NV_CC_GNUC
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
#else
# define NV_ALIGN_16 __declspec(align(16))
#if NV_CC_GNUC
# define NV_ALIGN_16 __attribute__ ((__aligned__ (16)))
#else
# define NV_ALIGN_16 __declspec(align(16))
#endif
@ -43,15 +43,15 @@ extern "C" {
namespace nv {
// C++ helpers.
template <typename T> T * malloc(size_t count) {
template <typename T> NV_FORCEINLINE T * malloc(size_t count) {
return (T *)::malloc(sizeof(T) * count);
}
template <typename T> T * realloc(T * ptr, size_t count) {
template <typename T> NV_FORCEINLINE T * realloc(T * ptr, size_t count) {
return (T *)::realloc(ptr, sizeof(T) * count);
}
template <typename T> void free(const T * ptr) {
template <typename T> NV_FORCEINLINE void free(const T * ptr) {
::free((void *)ptr);
}

@ -72,7 +72,7 @@ namespace nv
#if NV_OS_WIN32
return _ftell_nolock(m_fp);
#else
return ftell(m_fp);
return (uint)ftell(m_fp);
#endif
}
@ -85,9 +85,9 @@ namespace nv
uint end = _ftell_nolock(m_fp);
_fseek_nolock(m_fp, pos, SEEK_SET);
#else
uint pos = ftell(m_fp);
uint pos = (uint)ftell(m_fp);
fseek(m_fp, 0, SEEK_END);
uint end = ftell(m_fp);
uint end = (uint)ftell(m_fp);
fseek(m_fp, pos, SEEK_SET);
#endif
return end;

@ -189,7 +189,7 @@ StringBuilder::StringBuilder() : m_size(0), m_str(NULL)
}
/** Preallocate space. */
StringBuilder::StringBuilder( int size_hint ) : m_size(size_hint)
StringBuilder::StringBuilder( uint size_hint ) : m_size(size_hint)
{
nvDebugCheck(m_size > 0);
m_str = strAlloc(m_size);
@ -203,9 +203,15 @@ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
}
/** Copy string. */
StringBuilder::StringBuilder( const char * s, int extra_size_hint/*=0*/ ) : m_size(0), m_str(NULL)
StringBuilder::StringBuilder(const char * s) : m_size(0), m_str(NULL)
{
copy(s, extra_size_hint);
copy(s);
}
/** Copy string. */
StringBuilder::StringBuilder(const char * s, uint len) : m_size(0), m_str(NULL)
{
copy(s, len);
}
/** Delete the string. */
@ -396,15 +402,25 @@ StringBuilder & StringBuilder::reserve( uint size_hint )
/** Copy a string safely. */
StringBuilder & StringBuilder::copy( const char * s, int extra_size/*=0*/ )
StringBuilder & StringBuilder::copy(const char * s)
{
nvCheck( s != NULL );
const uint str_size = uint(strlen( s )) + 1;
reserve(str_size + extra_size);
reserve(str_size);
memcpy(m_str, s, str_size);
return *this;
}
/** Copy a string safely. */
StringBuilder & StringBuilder::copy(const char * s, uint len)
{
nvCheck( s != NULL );
const uint str_size = len + 1;
reserve(str_size);
strCpy(m_str, str_size, s, len);
return *this;
}
/** Copy an StringBuilder. */
StringBuilder & StringBuilder::copy( const StringBuilder & s )

@ -59,9 +59,10 @@ namespace nv
public:
StringBuilder();
explicit StringBuilder( int size_hint );
StringBuilder( const char * str, int extra_size_hint = 0);
StringBuilder( const StringBuilder & );
explicit StringBuilder( uint size_hint );
StringBuilder(const char * str);
StringBuilder(const char * str, uint len);
StringBuilder(const StringBuilder & other);
~StringBuilder();
@ -75,9 +76,10 @@ namespace nv
StringBuilder & number( int i, int base = 10 );
StringBuilder & number( uint i, int base = 10 );
StringBuilder & reserve( uint size_hint );
StringBuilder & copy( const char * str, int extra_size/*=0*/ );
StringBuilder & copy( const StringBuilder & str );
StringBuilder & reserve(uint size_hint);
StringBuilder & copy(const char * str);
StringBuilder & copy(const char * str, uint len);
StringBuilder & copy(const StringBuilder & str);
StringBuilder & toLower();
StringBuilder & toUpper();
@ -145,7 +147,7 @@ namespace nv
public:
Path() : StringBuilder() {}
explicit Path(int size_hint) : StringBuilder(size_hint) {}
Path(const char * str, int extra_size_hint = 0) : StringBuilder(str, extra_size_hint) {}
Path(const char * str) : StringBuilder(str) {}
Path(const Path & path) : StringBuilder(path) {}
const char * fileName() const;

@ -7,9 +7,76 @@
#include "nvcore.h"
#include "Debug.h" // nvDebugCheck
// Just in case. Grrr.
#undef min
#undef max
namespace nv
{
// Less error prone than casting. From CB:
// http://cbloomrants.blogspot.com/2011/06/06-17-11-c-casting-is-devil.html
inline int8 asSigned(uint8 x) { return (int8) x; }
inline int16 asSigned(uint16 x) { return (int16) x; }
inline int32 asSigned(uint32 x) { return (int32) x; }
inline int64 asSigned(uint64 x) { return (int64) x; }
inline uint8 asUnsigned(int8 x) { return (uint8) x; }
inline uint16 asUnsigned(int16 x) { return (uint16) x; }
inline uint32 asUnsigned(int32 x) { return (uint32) x; }
inline uint64 asUnsigned(int64 x) { return (uint64) x; }
/*
template <typename T> inline int8 toI8(T x) {
nvDebugCheck(x <= INT8_MAX);
nvDebugCheck(x >= INT8_MIN);
int8 y = (int8) x;
nvDebugCheck(x == (T)y);
return y;
}
template <typename T> inline uint8 toU8(T x) {
nvDebugCheck(x <= UINT8_MAX);
nvDebugCheck(x >= 0);
return (uint8) x;
}
template <typename T> inline int16 toI16(T x) {
nvDebugCheck(x <= INT16_MAX);
nvDebugCheck(x >= INT16_MIN);
return (int16) x;
}
template <typename T> inline uint16 toU16(T x) {
nvDebugCheck(x <= UINT16_MAX);
nvDebugCheck(x >= 0);
return (uint16) x;
}
template <typename T> inline int32 toI32(T x) {
nvDebugCheck(x <= INT32_MAX);
nvDebugCheck(x >= INT32_MIN);
return (int32) x;
}
template <typename T> inline uint32 toU32(T x) {
nvDebugCheck(x <= UINT32_MAX);
nvDebugCheck(x >= 0);
return (uint32) x;
}
template <typename T> inline int64 toI64(T x) {
nvDebugCheck(x <= INT64_MAX);
nvDebugCheck(x >= INT64_MIN);
return (int64) x;
}
template <typename T> inline uint64 toU64(T x) {
nvDebugCheck(x <= UINT64_MAX);
nvDebugCheck(x >= 0);
return (uint64) x;
}
*/
/// Swap two values.
template <typename T>
inline void swap(T & a, T & b)

@ -4,9 +4,6 @@
#ifndef NV_CORE_H
#define NV_CORE_H
// cmake config
#include <nvconfig.h>
// Function linkage
#if NVCORE_SHARED
#ifdef NVCORE_EXPORTS
@ -91,7 +88,11 @@
// @@ NV_CC_MSVC7
// @@ NV_CC_MSVC8
#if defined POSH_COMPILER_GCC
#if defined POSH_COMPILER_CLANG
# define NV_CC_CLANG 1
# define NV_CC_GCC 1 // Clang is compatible with GCC.
# define NV_CC_STRING "clang"
#elif defined POSH_COMPILER_GCC
# define NV_CC_GNUC 1
# define NV_CC_STRING "gcc"
#elif defined POSH_COMPILER_MSVC
@ -108,6 +109,18 @@
#define NV_ENDIAN_STRING POSH_ENDIAN_STRING
// Define the right printf prefix for size_t arguments:
#if POSH_64BIT_POINTER
# define NV_SIZET_PRINTF_PREFIX POSH_I64_PRINTF_PREFIX
#else
# define NV_SIZET_PRINTF_PREFIX
#endif
// cmake config
#include "nvconfig.h"
// Type definitions:
typedef posh_u8_t uint8;
typedef posh_i8_t int8;
@ -144,6 +157,8 @@ typedef uint32 uint;
private: \
void *operator new(size_t size); \
void *operator new[](size_t size);
//static void *operator new(size_t size); \
//static void *operator new[](size_t size);
// String concatenation macros.
#define NV_STRING_JOIN2(arg1, arg2) NV_DO_STRING_JOIN2(arg1, arg2)
@ -153,6 +168,25 @@ typedef uint32 uint;
#define NV_STRING2(x) #x
#define NV_STRING(x) NV_STRING2(x)
#if __cplusplus > 199711L
#define nvStaticCheck(x) static_assert(x)
#else
#define nvStaticCheck(x) typedef char NV_STRING_JOIN2(__static_assert_,__LINE__)[(x)]
#endif
#define NV_COMPILER_CHECK(x) nvStaticCheck(x) // I like this name best.
// Make sure type definitions are fine.
NV_COMPILER_CHECK(sizeof(int8) == 1);
NV_COMPILER_CHECK(sizeof(uint8) == 1);
NV_COMPILER_CHECK(sizeof(int16) == 2);
NV_COMPILER_CHECK(sizeof(uint16) == 2);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
NV_COMPILER_CHECK(sizeof(int32) == 4);
NV_COMPILER_CHECK(sizeof(uint32) == 4);
#define NV_ARRAY_SIZE(x) (sizeof(x)/sizeof((x)[0]))
#if 1
@ -180,6 +214,7 @@ typedef uint32 uint;
// Null index. @@ Move this somewhere else... it's only used by nvmesh.
//const unsigned int NIL = unsigned int(~0);
//#define NIL uint(~0)
// Null pointer.
#ifndef NULL

@ -1418,7 +1418,7 @@ uint DirectDrawSurface::mipmapSize(uint mipmap) const
{
nvDebugCheck((header.pf.flags & DDPF_RGB) || (header.pf.flags & DDPF_LUMINANCE));
uint pitch = computeBytePitch(w, header.pf.bitcount, 8); // Asuming 8 bit alignment, which is the same D3DX expects.
uint pitch = computeBytePitch(w, header.pf.bitcount, 1); // Asuming 1 byte alignment, which is the same D3DX expects.
return pitch * h * d;
}

@ -181,7 +181,7 @@ void FloatImage::normalize(uint baseComponent)
for (uint i = 0; i < count; i++) {
Vector3 normal(xChannel[i], yChannel[i], zChannel[i]);
normal = normalizeSafe(normal, Vector3(zero), 0.0f);
normal = normalizeSafe(normal, Vector3(0), 0.0f);
xChannel[i] = normal.x;
yChannel[i] = normal.y;

@ -56,6 +56,7 @@ namespace nv
//@{
NVIMAGE_API void clear(float f = 0.0f);
NVIMAGE_API void clear(uint component, float f = 0.0f);
NVIMAGE_API void copyChannel(uint src, uint dst);
NVIMAGE_API void normalize(uint base_component);
@ -113,8 +114,6 @@ namespace nv
uint pixelCount() const { return m_pixelCount; }
// @@ It would make sense to swap the order of the arguments so that 'c' is always first.
/** @name Pixel access. */
//@{
const float * channel(uint c) const;

@ -70,14 +70,14 @@ namespace nv
inline const Color32 & Image::pixel(uint x, uint y) const
{
nvDebugCheck(x < width() && y < height());
return pixel(y * width() + x);
nvDebugCheck(x < m_width && y < m_height);
return pixel(y * m_width + x);
}
inline Color32 & Image::pixel(uint x, uint y)
{
nvDebugCheck(x < width() && y < height());
return pixel(y * width() + x);
nvDebugCheck(x < m_width && y < m_height);
return pixel(y * m_width + x);
}
} // nv namespace

@ -215,7 +215,7 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName)
StdInputStream stream(fileName);
if (stream.isError()) {
return false;
return NULL;
}
return loadFloat(fileName, stream);
@ -324,9 +324,9 @@ bool nv::ImageIO::saveFloat(const char * fileName, Stream & s, const FloatImage
bool nv::ImageIO::saveFloat(const char * fileName, const FloatImage * fimage, uint baseComponent, uint componentCount)
{
#if !defined(HAVE_FREEIMAGE)
const char * extension = Path::extension(fileName);
#if !defined(HAVE_FREEIMAGE)
#if defined(HAVE_OPENEXR)
if (strCaseCmp(extension, ".exr") == 0) {
return saveFloatEXR(fileName, fimage, baseComponent, componentCount);
@ -711,7 +711,7 @@ Image * nv::ImageIO::loadTGA(Stream & s)
case TGA_TYPE_INDEXED:
if( tga.colormap_type!=1 || tga.colormap_size!=24 || tga.colormap_length>256 ) {
nvDebug( "*** loadTGA: Error, only 24bit paletted images are supported.\n" );
return false;
return NULL;
}
pal = true;
break;
@ -732,7 +732,7 @@ Image * nv::ImageIO::loadTGA(Stream & s)
default:
nvDebug( "*** loadTGA: Error, unsupported image type.\n" );
return false;
return NULL;
}
const uint pixel_size = (tga.pixel_size/8);
@ -1369,7 +1369,7 @@ Image * nv::ImageIO::loadJPG(Stream & s)
// Read the entire file.
Array<uint8> byte_array;
byte_array.resize(s.size());
s.serialize(byte_array.mutableBuffer(), s.size());
s.serialize(byte_array.buffer(), s.size());
jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr;

@ -487,46 +487,126 @@ nv::half_to_float( uint16 h )
return (f_result);
}
uint32
nv::fast_half_to_float( uint16 h )
// @@ These tables could be smaller.
static uint32 mantissa_table[2048];
static uint32 exponent_table[64];
static uint32 offset_table[64];
void nv::half_init_tables()
{
const uint32 h_e_mask = _uint32_li( 0x00007c00 );
const uint32 h_m_mask = _uint32_li( 0x000003ff );
const uint32 h_s_mask = _uint32_li( 0x00008000 );
const uint32 h_f_s_pos_offset = _uint32_li( 0x00000010 );
const uint32 h_f_e_pos_offset = _uint32_li( 0x0000000d );
const uint32 h_f_bias_offset = _uint32_li( 0x0001c000 );
const uint32 f_e_mask = _uint32_li( 0x7f800000 );
const uint32 f_m_mask = _uint32_li( 0x007fffff );
const uint32 h_f_e_denorm_bias = _uint32_li( 0x0000007e );
const uint32 h_f_m_denorm_sa_bias = _uint32_li( 0x00000008 );
const uint32 f_e_pos = _uint32_li( 0x00000017 );
const uint32 h_e_mask_minus_one = _uint32_li( 0x00007bff );
const uint32 h_e = _uint32_and( h, h_e_mask );
const uint32 h_m = _uint32_and( h, h_m_mask );
const uint32 h_s = _uint32_and( h, h_s_mask );
const uint32 h_e_f_bias = _uint32_add( h_e, h_f_bias_offset );
const uint32 h_m_nlz = _uint32_cntlz( h_m );
const uint32 f_s = _uint32_sll( h_s, h_f_s_pos_offset );
const uint32 f_e = _uint32_sll( h_e_f_bias, h_f_e_pos_offset );
const uint32 f_m = _uint32_sll( h_m, h_f_e_pos_offset );
const uint32 f_em = _uint32_or( f_e, f_m );
const uint32 h_f_m_sa = _uint32_sub( h_m_nlz, h_f_m_denorm_sa_bias );
const uint32 f_e_denorm_unpacked = _uint32_sub( h_f_e_denorm_bias, h_f_m_sa );
const uint32 h_f_m = _uint32_sll( h_m, h_f_m_sa );
const uint32 f_m_denorm = _uint32_and( h_f_m, f_m_mask );
const uint32 f_e_denorm = _uint32_sll( f_e_denorm_unpacked, f_e_pos );
const uint32 f_em_denorm = _uint32_or( f_e_denorm, f_m_denorm );
const uint32 f_em_nan = _uint32_or( f_e_mask, f_m );
const uint32 is_e_eqz_msb = _uint32_dec( h_e );
const uint32 is_m_nez_msb = _uint32_neg( h_m );
const uint32 is_e_flagged_msb = _uint32_sub( h_e_mask_minus_one, h_e );
const uint32 is_zero_msb = _uint32_andc( is_e_eqz_msb, is_m_nez_msb );
const uint32 is_denorm_msb = _uint32_and( is_m_nez_msb, is_e_eqz_msb );
const uint32 is_zero = _uint32_ext( is_zero_msb );
const uint32 f_zero_result = _uint32_andc( f_em, is_zero );
const uint32 f_denorm_result = _uint32_sels( is_denorm_msb, f_em_denorm, f_zero_result );
const uint32 f_result = _uint32_or( f_s, f_denorm_result );
// Init mantissa table.
mantissa_table[0] = 0;
return (f_result);
for (int i = 1; i < 1024; i++) {
uint m = i << 13;
uint e = 0;
while ((m & 0x00800000) == 0) {
e -= 0x00800000;
m <<= 1;
}
m &= ~0x00800000;
e += 0x38800000;
mantissa_table[i] = m | e;
}
for (int i = 1024; i < 2048; i++) {
mantissa_table[i] = 0x38000000 + ((i - 1024) << 13);
}
// Init exponent table.
exponent_table[0] = 0;
for (int i = 1; i < 31; i++) {
exponent_table[i] = (i << 23);
}
exponent_table[31] = 0x47800000;
exponent_table[32] = 0x80000000;
for (int i = 33; i < 63; i++) {
exponent_table[i] = 0x80000000 + ((i - 32) << 23);
}
exponent_table[63] = 0xC7800000;
// Init offset table.
offset_table[0] = 0;
for (int i = 1; i < 32; i++) {
offset_table[i] = 1024;
}
offset_table[32] = 0;
for (int i = 33; i < 64; i++) {
offset_table[i] = 1024;
}
/*for (int i = 0; i < 64; i++) {
offset_table[i] = ((i & 31) != 0) * 1024;
}*/
}
// Fast half to float conversion based on:
// http://www.fox-toolkit.org/ftp/fasthalffloatconversion.pdf
uint32 nv::fast_half_to_float(uint16 h)
{
uint exp = h >> 10;
return mantissa_table[offset_table[exp] + (h & 0x3ff)] + exponent_table[exp];
}
#if 0
// Inaccurate conversion suggested at the ffmpeg mailing list:
// http://lists.mplayerhq.hu/pipermail/ffmpeg-devel/2009-July/068949.html
uint32 nv::fast_half_to_float(uint16 v)
{
if (v & 0x8000) return 0;
uint exp = v >> 10;
if (!exp) return (v>>9)&1;
if (exp >= 15) return 0xffff;
v <<= 6;
return (v+(1<<16)) >> (15-exp);
}
#endif
#if 0
// Some more from a gamedev thread:
// http://www.devmaster.net/forums/showthread.php?t=10924
// I believe it does not handle specials either.
// Mike Acton's code should be fairly easy to vectorize and that would handle all cases too, the table method might still be faster, though.
static __declspec(align(16)) unsigned half_sign[4] = {0x00008000, 0x00008000, 0x00008000, 0x00008000};
static __declspec(align(16)) unsigned half_exponent[4] = {0x00007C00, 0x00007C00, 0x00007C00, 0x00007C00};
static __declspec(align(16)) unsigned half_mantissa[4] = {0x000003FF, 0x000003FF, 0x000003FF, 0x000003FF};
static __declspec(align(16)) unsigned half_bias_offset[4] = {0x0001C000, 0x0001C000, 0x0001C000, 0x0001C000};
__asm
{
movaps xmm1, xmm0 // Input in xmm0
movaps xmm2, xmm0
andps xmm0, half_sign
andps xmm1, half_exponent
andps xmm2, half_mantissa
paddd xmm1, half_bias_offset
pslld xmm0, 16
pslld xmm1, 13
pslld xmm2, 13
orps xmm1, xmm2
orps xmm0, xmm1 // Result in xmm0
}
#endif

@ -9,8 +9,9 @@ namespace nv {
uint32 half_to_float( uint16 h );
uint16 half_from_float( uint32 f );
// Does not handle NaN or infinity.
uint32 fast_half_to_float( uint16 h );
void half_init_tables();
uint32 fast_half_to_float(uint16 h);
inline uint16 to_half(float c) {
union { float f; uint32 u; } f;

@ -9,15 +9,14 @@
namespace nv
{
enum zero_t { zero };
enum identity_t { identity };
class NVMATH_CLASS Matrix3
{
public:
Matrix3();
Matrix3(zero_t);
Matrix3(identity_t);
explicit Matrix3(float f);
explicit Matrix3(identity_t);
Matrix3(const Matrix3 & m);
Matrix3(Vector3::Arg v0, Vector3::Arg v1, Vector3::Arg v2);
@ -41,10 +40,10 @@ namespace nv
inline Matrix3::Matrix3() {}
inline Matrix3::Matrix3(zero_t)
inline Matrix3::Matrix3(float f)
{
for(int i = 0; i < 9; i++) {
m_data[i] = 0.0f;
m_data[i] = f;
}
}
@ -204,11 +203,11 @@ namespace nv
typedef Matrix const & Arg;
Matrix();
Matrix(zero_t);
Matrix(identity_t);
explicit Matrix(float f);
explicit Matrix(identity_t);
Matrix(const Matrix & m);
Matrix(Vector4::Arg v0, Vector4::Arg v1, Vector4::Arg v2, Vector4::Arg v3);
Matrix(const scalar m[]); // m is assumed to contain 16 elements
//explicit Matrix(const scalar m[]); // m is assumed to contain 16 elements
scalar data(uint idx) const;
scalar & data(uint idx);
@ -237,7 +236,7 @@ namespace nv
{
}
inline Matrix::Matrix(zero_t)
inline Matrix::Matrix(float f)
{
for(int i = 0; i < 16; i++) {
m_data[i] = 0.0f;
@ -268,12 +267,12 @@ namespace nv
m_data[12] = v3.x; m_data[13] = v3.y; m_data[14] = v3.z; m_data[15] = v3.w;
}
inline Matrix::Matrix(const scalar m[])
/*inline Matrix::Matrix(const scalar m[])
{
for(int i = 0; i < 16; i++) {
m_data[i] = m[i];
}
}
}*/
// Accessors
@ -456,7 +455,7 @@ namespace nv
/// Get frustum matrix.
inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear, scalar zFar)
{
Matrix m(zero);
Matrix m(0.0f);
scalar doubleznear = 2.0f * zNear;
scalar one_deltax = 1.0f / (xmax - xmin);
@ -477,7 +476,7 @@ namespace nv
/// Get infinite frustum matrix.
inline Matrix frustum(scalar xmin, scalar xmax, scalar ymin, scalar ymax, scalar zNear)
{
Matrix m(zero);
Matrix m(0.0f);
scalar doubleznear = 2.0f * zNear;
scalar one_deltax = 1.0f / (xmax - xmin);

@ -100,6 +100,7 @@ namespace nv
explicit Vector4(scalar x);
Vector4(scalar x, scalar y, scalar z, scalar w);
Vector4(Vector2::Arg v, scalar z, scalar w);
Vector4(Vector2::Arg v, Vector2::Arg u);
Vector4(Vector3::Arg v, scalar w);
Vector4(Vector4::Arg v);
// Vector4(const Quaternion & v);
@ -107,6 +108,7 @@ namespace nv
const Vector4 & operator=(Vector4::Arg v);
Vector2 xy() const;
Vector2 zw() const;
Vector3 xyz() const;
const scalar * ptr() const;
@ -290,6 +292,7 @@ namespace nv
inline Vector4::Vector4(scalar f) : x(f), y(f), z(f), w(f) {}
inline Vector4::Vector4(scalar x, scalar y, scalar z, scalar w) : x(x), y(y), z(z), w(w) {}
inline Vector4::Vector4(Vector2::Arg v, scalar z, scalar w) : x(v.x), y(v.y), z(z), w(w) {}
inline Vector4::Vector4(Vector2::Arg v, Vector2::Arg u) : x(v.x), y(v.y), z(u.x), w(u.y) {}
inline Vector4::Vector4(Vector3::Arg v, scalar w) : x(v.x), y(v.y), z(v.z), w(w) {}
inline Vector4::Vector4(Vector4::Arg v) : x(v.x), y(v.y), z(v.z), w(v.w) {}
@ -307,6 +310,11 @@ namespace nv
return Vector2(x, y);
}
inline Vector2 Vector4::zw() const
{
return Vector2(z, w);
}
inline Vector3 Vector4::xyz() const
{
return Vector3(x, y, z);
@ -469,6 +477,14 @@ namespace nv
return scale(v, 1.0f / l);
}
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector2 normalizeFast(Vector2::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
{
@ -498,6 +514,14 @@ namespace nv
return vf;
}
inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
{
Vector2 v0 = a - c;
Vector2 v1 = b - c;
return (v0.x * v1.y - v0.y * v1.x);
}
// Vector3
@ -570,10 +594,10 @@ namespace nv
return scale(v, 1.0f/s);
}
inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s)
/*inline Vector3 add_scaled(Vector3::Arg a, Vector3::Arg b, scalar s)
{
return Vector3(a.x + b.x * s, a.y + b.y * s, a.z + b.z * s);
}
}*/
inline Vector3 lerp(Vector3::Arg v1, Vector3::Arg v2, scalar t)
{
@ -624,6 +648,15 @@ namespace nv
return scale(v, 1.0f / l);
}
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector3 normalizeFast(Vector3::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector3::Arg v1, Vector3::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon);
@ -762,6 +795,15 @@ namespace nv
return scale(v, 1.0f / l);
}
// Safe, branchless normalization from Andy Firth. All error checking ommitted.
// http://altdevblogaday.com/2011/08/21/practical-flt-point-tricks/
inline Vector4 normalizeFast(Vector4::Arg v)
{
const float very_small_float = 1.0e-037f;
float l = very_small_float + length(v);
return scale(v, 1.0f / l);
}
inline bool equal(Vector4::Arg v1, Vector4::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x, v2.x, epsilon) && equal(v1.y, v2.y, epsilon) && equal(v1.z, v2.z, epsilon) && equal(v1.w, v2.w, epsilon);

@ -4,8 +4,9 @@
#ifndef NV_MATH_H
#define NV_MATH_H
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
#include "nvcore/nvcore.h"
#include "nvcore/Debug.h"
#include "nvcore/Utils.h" // clamp
#include <math.h>
#include <limits.h> // INT_MAX
@ -194,7 +195,7 @@ namespace nv
return f - floor(f);
}
inline float fround(float f)
inline float fround(float f) // @@ rename floatRound
{
// @@ Do something better.
return float(iround(f));
@ -210,6 +211,29 @@ namespace nv
}
}
inline float saturate(float f) {
return clamp(f, 0.0f, 1.0f);
}
inline float linearstep(float edge0, float edge1, float x) {
// Scale, bias and saturate x to 0..1 range
return saturate((x - edge0) / (edge1 - edge0));
}
inline float smoothstep(float edge0, float edge1, float x) {
x = linearstep(edge0, edge1, x);
// Evaluate polynomial
return x*x*(3 - 2*x);
}
inline int sign(float a)
{
if (a > 0.0f) return 1;
if (a < 0.0f) return -1;
return 0;
}
} // nv
#endif // NV_MATH_H

@ -0,0 +1,26 @@
PROJECT(nvthreads)
SET(THREADS_SRCS
nvthreads.h
Mutex.h Mutex.cpp
SpinWaiter.h SpinWaiter.cpp
Thread.h Thread.cpp
ThreadLocalStorage.h ThreadLocalStorage.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})
# targets
ADD_DEFINITIONS(-DNVTHREADS_EXPORTS)
IF(NVTHREADS_SHARED)
ADD_LIBRARY(nvthreads SHARED ${THREADS_SRCS})
ELSE(NVTHREADS_SHARED)
ADD_LIBRARY(nvthreads ${THREADS_SRCS})
ENDIF(NVTHREADS_SHARED)
TARGET_LINK_LIBRARIES(nvthreads ${LIBS} nvcore)
INSTALL(TARGETS nvthreads
RUNTIME DESTINATION bin
LIBRARY DESTINATION lib
ARCHIVE DESTINATION lib/static)

@ -0,0 +1,52 @@
// This code is in the public domain -- castano@gmail.com
#include "Event.h"
#if NV_OS_WIN32
#include "Win32.h"
#elif NV_OS_UNIX
#include <pthread.h>
#endif
using namespace nv;
#if NV_OS_WIN32
struct Event::Private {
HANDLE handle;
};
Event::Event() : m(new Private) {
m->handle = CreateEvent(NULL, FALSE, FALSE, NULL);
}
Event::~Event() {
CloseHandle(m->handle);
}
void Event::post() {
SetEvent(m->handle);
}
void Event::wait() {
WaitForSingleObject(m->handle, INFINITE);
}
/*static*/ void Event::post(Event * events, uint count) {
for (uint i = 0; i < count; i++) {
events[i].post();
}
}
/*static*/ void Event::wait(Event * events, uint count) {
// @@ Use wait for multiple objects?
for (uint i = 0; i < count; i++) {
events[i].wait();
}
}
#elif NV_OS_UNIX
// @@
#endif

@ -0,0 +1,34 @@
// This code is in the public domain -- castano@gmail.com
#pragma once
#ifndef NV_THREAD_EVENT_H
#define NV_THREAD_EVENT_H
#include "nvthread.h"
#include "nvcore/Ptr.h"
namespace nv
{
// This is intended to be used by a single waiter thread.
class NVTHREAD_CLASS Event
{
NV_FORBID_COPY(Event);
public:
Event();
~Event();
void post();
void wait(); // Wait resets the event.
static void post(Event * events, uint count);
static void wait(Event * events, uint count);
private:
struct Private;
AutoPtr<Private> m;
};
} // nv namespace
#endif // NV_THREAD_EVENT_H

@ -0,0 +1,89 @@
// This code is in the public domain -- castano@gmail.com
#include "Mutex.h"
#if NV_OS_WIN32
#include "Win32.h"
#elif NV_OS_UNIX
#include <pthread.h>
#include <errno.h> // EBUSY
#endif // NV_OS
using namespace nv;
#if NV_OS_WIN32
struct Mutex::Private {
CRITICAL_SECTION mutex;
};
Mutex::Mutex () : m(new Private)
{
InitializeCriticalSection(&m->mutex);
}
Mutex::~Mutex ()
{
DeleteCriticalSection(&m->mutex);
}
void Mutex::lock()
{
EnterCriticalSection(&m->mutex);
}
bool Mutex::tryLock()
{
return TryEnterCriticalSection(&m->mutex) != 0;
}
void Mutex::unlock()
{
LeaveCriticalSection(&m->mutex);
}
#elif NV_OS_UNIX
struct Mutex::Private {
pthread_mutex_t mutex;
};
Mutex::Mutex () : m(new Private)
{
int result = pthread_mutex_init(&m->mutex , NULL);
nvDebugCheck(result == 0);
}
Mutex::~Mutex ()
{
int result = pthread_mutex_destroy(&m->mutex);
nvDebugCheck(result == 0);
}
void Mutex::lock()
{
int result = pthread_mutex_lock(&m->mutex);
nvDebugCheck(result == 0);
}
bool Mutex::tryLock()
{
int result = pthread_mutex_trylock(&m->mutex);
nvDebugCheck(result == 0 || result == EBUSY);
return result == 0;
}
void Mutex::unlock()
{
int result = pthread_mutex_unlock(&m->mutex);
nvDebugCheck(result == 0);
}
#endif // NV_OS

@ -0,0 +1,47 @@
// This code is in the public domain -- castano@gmail.com
#pragma once
#ifndef NV_THREAD_MUTEX_H
#define NV_THREAD_MUTEX_H
#include "nvthread.h"
#include "nvcore/Ptr.h"
namespace nv
{
class NVTHREAD_CLASS Mutex
{
NV_FORBID_COPY(Mutex);
public:
Mutex ();
~Mutex ();
void lock();
bool tryLock();
void unlock();
private:
struct Private;
AutoPtr<Private> m;
};
// Templated lock that can be used with any mutex.
template <class M>
class Lock
{
NV_FORBID_COPY(Lock);
public:
Lock (M & m) : m_mutex (m) { m_mutex.lock(); }
~Lock () { m_mutex.unlock(); }
private:
M & m_mutex;
};
} // nv namespace
#endif // NV_THREAD_MUTEX_H

@ -0,0 +1,61 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#include "ParallelFor.h"
#include "Thread.h"
#include "Atomic.h"
#include "ThreadPool.h"
using namespace nv;
#define ENABLE_PARALLEL_FOR 1
void worker(void * arg) {
ParallelFor * owner = (ParallelFor *)arg;
while(true) {
// Consume one element at a time. @@ Might be more efficient to have custom grain.
uint i = atomicIncrement(&owner->idx);
if (i > owner->count) {
break;
}
owner->task(owner->context, i - 1);
}
}
ParallelFor::ParallelFor(ForTask * task, void * context) : task(task), context(context) {
#if ENABLE_PARALLEL_FOR
pool = ThreadPool::acquire();
#endif
}
ParallelFor::~ParallelFor() {
#if ENABLE_PARALLEL_FOR
ThreadPool::release(pool);
#endif
}
void ParallelFor::run(uint count) {
#if ENABLE_PARALLEL_FOR
storeRelease(&this->count, count);
// Init atomic counter to zero.
storeRelease(&idx, 0);
// Start threads.
pool->start(worker, this);
// Wait for all threads to complete.
pool->wait();
nvDebugCheck(idx >= count);
#else
for (int i = 0; i < count; i++) {
task(context, i);
}
#endif
}

@ -0,0 +1,38 @@
// This code is in the public domain -- Ignacio Castaño <castano@gmail.com>
#pragma once
#ifndef NV_THREAD_PARALLELFOR_H
#define NV_THREAD_PARALLELFOR_H
#include "nvthread.h"
//#include "Atomic.h" // atomic<uint>
namespace nv
{
class Thread;
class ThreadPool;
typedef void ForTask(void * context, int id);
struct ParallelFor {
ParallelFor(ForTask * task, void * context);
~ParallelFor();
void run(uint count);
// Invariant:
ForTask * task;
void * context;
ThreadPool * pool;
//uint workerCount; // @@ Move to thread pool.
//Thread * workers;
// State:
uint count;
/*atomic<uint>*/ uint idx;
};
} // nv namespace
#endif // NV_THREAD_PARALLELFOR_H

@ -0,0 +1,136 @@
// This code is in the public domain -- castano@gmail.com
#include "Thread.h"
#if NV_OS_WIN32
#include "Win32.h"
#elif NV_OS_UNIX
#include <pthread.h>
#include <unistd.h> // usleep
#endif
using namespace nv;
struct Thread::Private
{
#if NV_OS_WIN32
HANDLE thread;
#elif NV_OS_UNIX
pthread_t thread;
#endif
ThreadFunc * func;
void * arg;
};
#if NV_OS_WIN32
unsigned long __stdcall threadFunc(void * arg) {
Thread * thread = (Thread *)arg;
thread->func(thread->arg);
return 0;
}
#elif NV_OS_UNIX
extern "C" void * threadFunc(void * arg) {
Thread * thread = (Thread *)arg;
thread->func(thread->arg);
pthread_exit(0);
}
#endif
Thread::Thread() : p(new Private)
{
p->thread = 0;
}
Thread::~Thread()
{
nvDebugCheck(p->thread == 0);
}
void Thread::start(ThreadFunc * func, void * arg)
{
this->func = func;
this->arg = arg;
#if NV_OS_WIN32
p->thread = CreateThread(NULL, 0, threadFunc, this, 0, NULL);
//p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, this, 0, NULL); // @@ So that we can call CRT functions...
nvDebugCheck(p->thread != NULL);
#elif NV_OS_UNIX
int result = pthread_create(&p->thread, NULL, threadFunc, this);
nvDebugCheck(result == 0);
#endif
}
void Thread::wait()
{
#if NV_OS_WIN32
DWORD status = WaitForSingleObject (p->thread, INFINITE);
nvCheck (status == WAIT_OBJECT_0);
BOOL ok = CloseHandle (p->thread);
p->thread = NULL;
nvCheck (ok);
#elif NV_OS_UNIX
int result = pthread_join(p->thread, NULL);
p->thread = 0;
nvDebugCheck(result == 0);
#endif
}
bool Thread::isRunning () const
{
#if NV_OS_WIN32
return p->thread != NULL;
#elif NV_OS_UNIX
return p->thread != 0;
#endif
}
/*static*/ void Thread::spinWait(uint count)
{
for (uint i = 0; i < count; i++) {}
}
/*static*/ void Thread::yield()
{
#if NV_OS_WIN32
SwitchToThread();
#elif NV_OS_UNIX
int result = sched_yield();
nvDebugCheck(result == 0);
#endif
}
/*static*/ void Thread::sleep(uint ms)
{
#if NV_OS_WIN32
Sleep(ms);
#elif NV_OS_UNIX
usleep(1000 * ms);
#endif
}
/*static*/ void Thread::wait(Thread * threads, uint count)
{
/*#if NV_OS_WIN32
// @@ Is there any advantage in doing this?
nvDebugCheck(count < MAXIMUM_WAIT_OBJECTS);
HANDLE * handles = new HANDLE[count];
for (uint i = 0; i < count; i++) {
handles[i] = threads->p->thread;
}
DWORD result = WaitForMultipleObjects(count, handles, TRUE, INFINITE);
delete [] handles;
#else*/
for (uint i = 0; i < count; i++) {
threads[i].wait();
}
//#endif
}

@ -0,0 +1,46 @@
// This code is in the public domain -- castano@gmail.com
#pragma once
#ifndef NV_THREAD_THREAD_H
#define NV_THREAD_THREAD_H
#include "nvthread.h"
#include "nvcore/Ptr.h"
namespace nv
{
typedef void ThreadFunc(void * arg);
class NVTHREAD_CLASS Thread
{
NV_FORBID_COPY(Thread);
public:
Thread();
~Thread();
void start(ThreadFunc * func, void * arg);
void wait();
bool isRunning() const;
static void spinWait(uint count);
static void yield();
static void sleep(uint ms);
static void wait(Thread * threads, uint count);
private:
struct Private;
AutoPtr<Private> p;
public:
ThreadFunc * func;
void * arg;
};
} // nv namespace
#endif // NV_THREAD_THREAD_H

@ -0,0 +1,121 @@
// This code is in the public domain -- castano@gmail.com
#include "ThreadPool.h"
#include "Mutex.h"
#include "Thread.h"
// Most of the time it's not necessary to protect the thread pool, but if it doesn't add a significant overhead, then it'd be safer to do it.
#define PROTECT_THREAD_POOL 1
using namespace nv;
#if PROTECT_THREAD_POOL
Mutex s_pool_mutex;
#endif
AutoPtr<ThreadPool> s_pool;
/*static*/ ThreadPool * ThreadPool::acquire()
{
#if PROTECT_THREAD_POOL
s_pool_mutex.lock(); // @@ If same thread tries to lock twice, this should assert.
#endif
if (s_pool == NULL) {
ThreadPool * p = new ThreadPool;
nvDebugCheck(s_pool == p);
}
return s_pool.ptr();
}
/*static*/ void ThreadPool::release(ThreadPool * pool)
{
nvDebugCheck(pool == s_pool);
// Make sure the threads of the pool are idle.
s_pool->wait();
#if PROTECT_THREAD_POOL
s_pool_mutex.unlock();
#endif
}
/*static*/ void ThreadPool::workerFunc(void * arg) {
uint i = (uint)arg;
while(true)
{
s_pool->startEvents[i].wait();
if (s_pool->func == NULL) {
return; // @@ should we post finish event anyway?
}
s_pool->func(s_pool->arg);
s_pool->finishEvents[i].post();
}
}
ThreadPool::ThreadPool()
{
s_pool = this; // Worker threads need this to be initialized before they start.
workerCount = nv::hardwareThreadCount();
workers = new Thread[workerCount];
startEvents = new Event[workerCount];
finishEvents = new Event[workerCount];
for (uint i = 0; i < workerCount; i++) {
workers[i].start(workerFunc, (void *)i);
}
allIdle = true;
}
ThreadPool::~ThreadPool()
{
// Set threads to terminate.
start(NULL, NULL);
// Wait until threads actually exit.
Thread::wait(workers, workerCount);
delete [] workers;
delete [] startEvents;
delete [] finishEvents;
}
void ThreadPool::start(ThreadFunc * func, void * arg)
{
// Wait until threads are idle.
wait();
// Set our desired function.
this->func = func;
this->arg = arg;
allIdle = false;
// Resume threads.
Event::post(startEvents, workerCount);
}
void ThreadPool::wait()
{
if (!allIdle)
{
// Wait for threads to complete.
Event::wait(finishEvents, workerCount);
allIdle = true;
}
}

@ -0,0 +1,49 @@
// This code is in the public domain -- castano@gmail.com
#pragma once
#ifndef NV_THREAD_THREADPOOL_H
#define NV_THREAD_THREADPOOL_H
#include "nvthread.h"
#include "Event.h"
#include "Thread.h"
namespace nv {
class Thread;
class Event;
class ThreadPool {
NV_FORBID_COPY(ThreadPool);
public:
static ThreadPool * acquire();
static void release(ThreadPool *);
ThreadPool();
~ThreadPool();
void start(ThreadFunc * func, void * arg);
void wait();
private:
static void workerFunc(void * arg);
uint workerCount;
Thread * workers;
Event * startEvents;
Event * finishEvents;
uint allIdle;
// Current function:
ThreadFunc * func;
void * arg;
};
} // namespace nv
#endif // NV_THREAD_THREADPOOL_H

@ -0,0 +1,9 @@
// This code is in the public domain -- castano@gmail.com
// Never include this from a header file.
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#define _WIN32_WINNT 0x0400 // for SwitchToThread, TryEnterCriticalSection
#include <windows.h>
//#include <process.h> // for _beginthreadex

@ -0,0 +1,51 @@
#include "nvthread.h"
#include "Thread.h"
#define WIN32_LEAN_AND_MEAN
#define VC_EXTRALEAN
#include <windows.h>
using namespace nv;
// Find the number of cores in the system.
// Based on: http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
// @@ Distinguish between logical and physical cores?
uint nv::hardwareThreadCount() {
#if NV_OS_WIN32
SYSTEM_INFO sysinfo;
GetSystemInfo( &sysinfo );
return sysinfo.dwNumberOfProcessors;
#elif NV_OS_XBOX
return 3; // or 6?
#elif NV_OS_LINUX // Linux, Solaris, & AIX
return sysconf(_SC_NPROCESSORS_ONLN);
#elif NV_OS_DARWIN || NV_OS_FREEBSD
int numCPU;
int mib[4];
size_t len = sizeof(numCPU);
// set the mib for hw.ncpu
mib[0] = CTL_HW;
mib[1] = HW_AVAILCPU; // alternatively, try HW_NCPU;
// get the number of CPUs from the system
sysctl(mib, 2, &numCPU, &len, NULL, 0);
if (numCPU < 1) {
mib[1] = HW_NCPU;
sysctl( mib, 2, &numCPU, &len, NULL, 0 );
if (numCPU < 1) {
return 1; // Assume single core.
}
}
return numCPU;
#else
return 1; // Assume single core.
#endif
}

@ -0,0 +1,83 @@
// This code is in the public domain -- castanyo@yahoo.es
#pragma once
#ifndef NV_THREAD_H
#define NV_THREAD_H
#include "nvcore/nvcore.h"
// Function linkage
#if NVTHREAD_SHARED
#ifdef NVTHREAD_EXPORTS
#define NVTHREAD_API DLL_EXPORT
#define NVTHREAD_CLASS DLL_EXPORT_CLASS
#else
#define NVTHREAD_API DLL_IMPORT
#define NVTHREAD_CLASS DLL_IMPORT
#endif
#else // NVMATH_SHARED
#define NVTHREAD_API
#define NVTHREAD_CLASS
#endif // NVMATH_SHARED
// Compiler barriers.
// See: http://en.wikipedia.org/wiki/Memory_ordering
#if NV_CC_MSVC
#include <intrin.h>
#pragma intrinsic(_WriteBarrier)
#define nvCompilerWriteBarrier _WriteBarrier
#pragma intrinsic(_ReadWriteBarrier)
#define nvCompilerReadWriteBarrier _ReadWriteBarrier
#if _MSC_VER >= 1400 // ReadBarrier is VC2005
#pragma intrinsic(_ReadBarrier)
#define nvCompilerReadBarrier _ReadBarrier
#else
#define nvCompilerReadBarrier _ReadWriteBarrier
#endif
#elif NV_CC_GNUC
#define nvCompilerReadWriteBarrier() asm volatile("" ::: "memory");
#define nvCompilerWriteBarrier nvCompilerReadWriteBarrier
#define nvCompilerReadBarrier nvCompilerReadWriteBarrier
#endif // NV_CC_MSVC
// @@ Memory barriers / fences.
// @@ Atomics.
/* Wrap this up:
#define YieldProcessor() __asm { rep nop }
#define YieldProcessor _mm_pause
#define YieldProcessor __yield
BOOL WINAPI SwitchToThread(void);
*/
namespace nv
{
// Reentrant.
uint hardwareThreadCount();
// Not thread-safe. Use from main thread only.
void initWorkers();
void shutWorkers();
void setWorkerFunction(void * func);
} // nv namespace
#endif // NV_THREAD_H

@ -37,7 +37,7 @@ using namespace nv;
using namespace nvtt;
void CompressorBC6::compressBlock(Tile & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
void CompressorBC6::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
NV_UNUSED(alphaMode); // ZOH does not support alpha.
@ -56,7 +56,7 @@ void CompressorBC6::compressBlock(Tile & tile, AlphaMode alphaMode, const Compre
}
void CompressorBC7::compressBlock(Tile & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
void CompressorBC7::compressBlock(ColorSet & tile, AlphaMode alphaMode, const CompressionOptions::Private & compressionOptions, void * output)
{
// @@ TODO
}

@ -481,10 +481,10 @@ void D3DXCompressorDXT1::compress(nvtt::InputFormat inputFormat, nvtt::AlphaMode
err = surface->LockRect(&rect, NULL, D3DLOCK_READONLY);
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
if (outputOptions.outputHandler != NULL) {
int size = rect.Pitch * ((h + 3) / 4);
outputOptions.outputHandler->writeData(rect.pBits, size);
}
err = surface->UnlockRect();
}

@ -110,7 +110,7 @@ namespace
{
nvDebugCheck(alignment >= 1);
flush();
int remainder = (size_t)ptr % alignment;
int remainder = (int)((uintptr_t)ptr % alignment);
if (remainder != 0) {
putBits(0, (alignment - remainder) * 8);
}

@ -349,6 +349,8 @@ bool Compressor::Private::compress(AlphaMode alphaMode, int w, int h, int d, int
compressor->compress(alphaMode, w, h, d, rgba, dispatcher, compressionOptions, outputOptions);
}
outputOptions.endImage();
return true;
}

@ -135,6 +135,11 @@ bool OutputOptions::Private::writeData(const void * data, int size) const
return outputHandler == NULL || outputHandler->writeData(data, size);
}
void OutputOptions::Private::endImage() const
{
if (outputHandler != NULL) outputHandler->endImage();
}
void OutputOptions::Private::error(Error e) const
{
if (errorHandler != NULL) errorHandler->error(e);

@ -52,6 +52,11 @@ namespace nvtt
return true;
}
virtual void endImage()
{
// ignore.
}
nv::StdOutputStream stream;
};
@ -72,6 +77,7 @@ namespace nvtt
void beginImage(int size, int width, int height, int depth, int face, int miplevel) const;
bool writeData(const void * data, int size) const;
void endImage() const;
void error(Error e) const;
};

@ -18,8 +18,8 @@
// http://msdn.microsoft.com/en-us/library/dd504870.aspx
#if NV_OS_WIN32 && _MSC_VER >= 1600
#define HAVE_PPL 1
//#include <array>
#include <ppl.h>
#include <array>
//#include <ppl.h>
#endif
// Intel Thread Building Blocks (TBB).
@ -28,6 +28,8 @@
#include <tbb/parallel_for.h>
#endif
#include "nvthread/ParallelFor.h"
namespace nvtt {
@ -40,6 +42,15 @@ namespace nvtt {
}
};
struct ParallelTaskDispatcher : public TaskDispatcher
{
virtual void dispatch(Task * task, void * context, int count) {
nv::ParallelFor parallelFor(task, context);
parallelFor.run(count); // @@ Add support for custom grain.
}
};
#if defined(HAVE_OPENMP)
struct OpenMPTaskDispatcher : public TaskDispatcher
@ -81,9 +92,24 @@ namespace nvtt {
#if defined(HAVE_PPL)
class CountingIterator
{
public:
CountingIterator() : i(0) {}
CountingIterator(const CountingIterator & rhs) : i(0) {}
explicit CountingIterator(int x) : i(x) {}
const int & operator*() const { return i; }
CountingIterator & operator++() { i++; return *this; }
CountingIterator & operator--() { i--; return *this; }
private:
int i;
};
struct TaskFunctor {
TaskFunctor(Task * task, void * context) : task(task), context(context) {}
void operator()(int n) const {
void operator()(int & n) const {
task(context, n);
}
Task * task;
@ -95,12 +121,16 @@ namespace nvtt {
{
virtual void dispatch(Task * task, void * context, int count)
{
CountingIterator begin(0);
CountingIterator end((int)count);
TaskFunctor func(task, context);
Concurrency::parallel_for(0, count, func);
std::for_each(begin, end, func);
//parallel_for_each(begin, end, func);
}
};
#endif // HAVE_PPL
#endif
#if defined(HAVE_TBB)
@ -132,7 +162,8 @@ namespace nvtt {
#elif defined(HAVE_GCD)
typedef AppleTaskDispatcher ConcurrentTaskDispatcher;
#else
typedef SequentialTaskDispatcher ConcurrentTaskDispatcher;
//typedef SequentialTaskDispatcher ConcurrentTaskDispatcher;
typedef ParallelTaskDispatcher ConcurrentTaskDispatcher;
#endif
} // namespace nvtt

@ -615,7 +615,7 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC3)
@ -629,19 +629,19 @@ bool TexImage::setImage2D(Format format, Decoder decoder, int w, int h, const vo
block->decodeBlock(&colors, false);
}
else if (decoder == Decoder_NV5x) {
block->decodeBlockNV5x(&colors);
block->decodeBlockNV5x(&colors);
}
}
else if (format == nvtt::Format_BC4)
{
const BlockATI1 * block = (const BlockATI1 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC5)
{
const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
const BlockATI1 * block = (const BlockATI1 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
else if (format == nvtt::Format_BC5)
{
const BlockATI2 * block = (const BlockATI2 *)ptr;
block->decodeBlock(&colors, decoder == Decoder_D3D9);
}
for (int yy = 0; yy < 4; yy++)
{
@ -864,6 +864,42 @@ bool TexImage::buildNextMipmap(MipmapFilter filter, float filterWidth, const flo
return true;
}
void TexImage::canvasSize(int w, int h, int d)
{
nvDebugCheck(w > 0 && h > 0 && d > 0);
FloatImage * img = m->image;
if (img == NULL || (w == img->width() && h == img->height() && d == img->depth())) {
return;
}
detach();
FloatImage * new_img = new FloatImage;
new_img->allocate(4, w, h, d);
new_img->clear();
w = min(uint(w), img->width());
h = min(uint(h), img->height());
d = min(uint(d), img->depth());
for (int z = 0; z < d; z++) {
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
new_img->pixel(0, x, y, z) = img->pixel(0, x, y, z);
new_img->pixel(1, x, y, z) = img->pixel(1, x, y, z);
new_img->pixel(2, x, y, z) = img->pixel(2, x, y, z);
new_img->pixel(3, x, y, z) = img->pixel(3, x, y, z);
}
}
}
delete m->image;
m->image = new_img;
m->type = (d == 1) ? TextureType_2D : TextureType_3D;
}
// Color transforms.
void TexImage::toLinear(float gamma)
{
@ -885,6 +921,66 @@ void TexImage::toGamma(float gamma)
m->image->toGamma(0, 3, gamma);
}
static float toSrgb(float f) {
if (f <= 0.0) f = 0.0f;
else if (f <= 0.0031308f) f = 12.92f * f;
else if (f <= 1.0f) f = (powf(f, 0.41666f) * 1.055f) - 0.055f;
else f = 1.0f;
return f;
}
void TexImage::toSrgb()
{
FloatImage * img = m->image;
if (img == NULL) return;
detach();
const uint count = img->pixelCount();
for (uint j = 0; j < count; j++)
{
float & r = img->pixel(0, j);
float & g = img->pixel(1, j);
float & b = img->pixel(2, j);
r = ::toSrgb(r);
g = ::toSrgb(g);
b = ::toSrgb(b);
}
}
static float toXenonSrgb(float f) {
if (f < 0) f = 0;
else if (f < (1.0f/16.0f)) f = 4.0f * f;
else if (f < (1.0f/8.0f)) f = 0.25f + 2.0f * (f - 0.0625f);
else if (f < 0.5f) f = 0.375f + 1.0f * (f - 0.125f);
else if (f < 1.0f) f = 0.75f + 0.5f * (f - 0.50f);
else f = 1.0f;
return f;
}
void TexImage::toXenonSrgb()
{
FloatImage * img = m->image;
if (img == NULL) return;
detach();
const uint count = img->pixelCount();
for (uint j = 0; j < count; j++)
{
float & r = img->pixel(0, j);
float & g = img->pixel(1, j);
float & b = img->pixel(2, j);
r = ::toXenonSrgb(r);
g = ::toXenonSrgb(g);
b = ::toXenonSrgb(b);
}
}
void TexImage::transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4])
{
if (m->image == NULL) return;
@ -1140,9 +1236,9 @@ void TexImage::toRGBM(float range/*= 1*/, float threshold/*= 0.25*/)
const uint count = img->pixelCount();
for (uint i = 0; i < count; i++) {
float R = nv::clamp(r[i] * irange, 0.0f, 1.0f);
float G = nv::clamp(g[i] * irange, 0.0f, 1.0f);
float B = nv::clamp(b[i] * irange, 0.0f, 1.0f);
float R = nv::clamp(r[i], 0.0f, 1.0f);
float G = nv::clamp(g[i], 0.0f, 1.0f);
float B = nv::clamp(b[i], 0.0f, 1.0f);
#if 1
float M = max(max(R, G), max(B, threshold));

@ -294,6 +294,9 @@ namespace nvtt
/// Output data. Compressed data is output as soon as it's generated to minimize memory allocations.
virtual bool writeData(const void * data, int size) = 0;
/// Indicate the end of a the compressed image.
virtual void endImage() = 0;
};
/// Error codes.
@ -440,10 +443,13 @@ namespace nvtt
NVTT_API void resize(int maxExtent, RoundMode mode, ResizeFilter filter, float filterWidth, const float * params = 0);
NVTT_API bool buildNextMipmap(MipmapFilter filter);
NVTT_API bool buildNextMipmap(MipmapFilter filter, float filterWidth, const float * params = 0);
NVTT_API void canvasSize(int w, int h, int d);
// Color transforms.
NVTT_API void toLinear(float gamma);
NVTT_API void toGamma(float gamma);
NVTT_API void toSrgb();
NVTT_API void toXenonSrgb();
NVTT_API void transform(const float w0[4], const float w1[4], const float w2[4], const float w3[4], const float offset[4]);
NVTT_API void swizzle(int r, int g, int b, int a);
NVTT_API void scaleBias(int channel, float scale, float bias);

@ -56,6 +56,11 @@ struct MyOutputHandler : public nvtt::OutputHandler
// ignore.
}
virtual void endImage()
{
// Ignore.
}
// Output data.
virtual bool writeData(const void * data, int size)
{

Loading…
Cancel
Save