diff --git a/project/vc12/bc6h/bc6h.vcxproj b/project/vc12/bc6h/bc6h.vcxproj
index d13ccd3..7b75890 100644
--- a/project/vc12/bc6h/bc6h.vcxproj
+++ b/project/vc12/bc6h/bc6h.vcxproj
@@ -92,12 +92,13 @@
Disabled
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
@@ -106,12 +107,13 @@
Disabled
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
@@ -140,6 +142,7 @@
Level3
ProgramDatabase
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
diff --git a/project/vc12/bc7/bc7.vcxproj b/project/vc12/bc7/bc7.vcxproj
index 2240bea..717a3ad 100644
--- a/project/vc12/bc7/bc7.vcxproj
+++ b/project/vc12/bc7/bc7.vcxproj
@@ -100,12 +100,13 @@
Disabled
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
@@ -114,12 +115,13 @@
Disabled
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
@@ -148,6 +150,7 @@
Level3
ProgramDatabase
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
diff --git a/project/vc12/imperativeapi/imperativeapi.vcxproj b/project/vc12/imperativeapi/imperativeapi.vcxproj
index 231ad85..3a03f0d 100644
--- a/project/vc12/imperativeapi/imperativeapi.vcxproj
+++ b/project/vc12/imperativeapi/imperativeapi.vcxproj
@@ -79,12 +79,13 @@
Disabled
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
%(AdditionalDependencies)
@@ -99,12 +100,13 @@
Disabled
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
%(AdditionalLibraryDirectories)
@@ -152,6 +154,7 @@
Level3
ProgramDatabase
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
%(AdditionalLibraryDirectories)
diff --git a/project/vc12/nvassemble/nvassemble.vcxproj b/project/vc12/nvassemble/nvassemble.vcxproj
index c71faf5..1905239 100644
--- a/project/vc12/nvassemble/nvassemble.vcxproj
+++ b/project/vc12/nvassemble/nvassemble.vcxproj
@@ -83,11 +83,12 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
%(AdditionalDependencies)
@@ -109,6 +110,7 @@
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
MultiThreadedDebugDLL
+ true
libpng.lib;jpeg.lib;tiff.lib;%(AdditionalDependencies)
@@ -158,6 +160,7 @@
true
MultiThreadedDLL
false
+ true
libpng.lib;jpeg.lib;tiff.lib;%(AdditionalDependencies)
diff --git a/project/vc12/nvcompress/nvcompress.vcxproj b/project/vc12/nvcompress/nvcompress.vcxproj
index f764c76..f12366e 100644
--- a/project/vc12/nvcompress/nvcompress.vcxproj
+++ b/project/vc12/nvcompress/nvcompress.vcxproj
@@ -260,13 +260,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
%(AdditionalDependencies)
@@ -284,11 +285,12 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVDXT_SHARED;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
%(AdditionalLibraryDirectories)
@@ -311,6 +313,7 @@
Level3
ProgramDatabase
+ true
%(AdditionalDependencies)
@@ -331,6 +334,7 @@
WIN32;NDEBUG;_CONSOLE;NVDXT_SHARED;%(PreprocessorDefinitions)
MultiThreadedDLL
Level3
+ true
%(AdditionalLibraryDirectories)
diff --git a/project/vc12/nvcore/nvcore.vcxproj b/project/vc12/nvcore/nvcore.vcxproj
index cfd81dd..00835fd 100644
--- a/project/vc12/nvcore/nvcore.vcxproj
+++ b/project/vc12/nvcore/nvcore.vcxproj
@@ -82,13 +82,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
@@ -99,13 +100,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
@@ -147,6 +149,7 @@
Level3
ProgramDatabase
+ true
diff --git a/project/vc12/nvddsinfo/nvddsinfo.vcxproj b/project/vc12/nvddsinfo/nvddsinfo.vcxproj
index e2c7a1c..24b3611 100644
--- a/project/vc12/nvddsinfo/nvddsinfo.vcxproj
+++ b/project/vc12/nvddsinfo/nvddsinfo.vcxproj
@@ -83,11 +83,12 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
%(AdditionalDependencies)
@@ -109,6 +110,7 @@
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
MultiThreadedDebugDLL
+ true
libpng.lib;jpeg.lib;tiff.lib;%(AdditionalDependencies)
@@ -158,6 +160,7 @@
true
MultiThreadedDLL
false
+ true
libpng.lib;jpeg.lib;tiff.lib;%(AdditionalDependencies)
diff --git a/project/vc12/nvdecompress/nvdecompress.vcxproj b/project/vc12/nvdecompress/nvdecompress.vcxproj
index 52961a8..841be5c 100644
--- a/project/vc12/nvdecompress/nvdecompress.vcxproj
+++ b/project/vc12/nvdecompress/nvdecompress.vcxproj
@@ -86,13 +86,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
%(AdditionalDependencies)
@@ -113,13 +114,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVDXT_SHARED;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
..\..\..\extern\gnuwin32\lib;%(AdditionalLibraryDirectories)
@@ -180,6 +182,7 @@
Level3
+ true
..\..\..\extern\gnuwin32\lib;%(AdditionalLibraryDirectories)
@@ -238,4 +241,4 @@
-
\ No newline at end of file
+
diff --git a/project/vc12/nvimage/nvimage.vcxproj b/project/vc12/nvimage/nvimage.vcxproj
index 3d99f6c..249859b 100644
--- a/project/vc12/nvimage/nvimage.vcxproj
+++ b/project/vc12/nvimage/nvimage.vcxproj
@@ -82,13 +82,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;$(SolutionDir)\..\..\extern\stb;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
@@ -99,13 +100,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;$(SolutionDir)\..\..\extern\stb;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
@@ -146,6 +148,7 @@
Level3
ProgramDatabase
+ true
diff --git a/project/vc12/nvimgdiff/nvimgdiff.vcxproj b/project/vc12/nvimgdiff/nvimgdiff.vcxproj
index 5deb260..b61267c 100644
--- a/project/vc12/nvimgdiff/nvimgdiff.vcxproj
+++ b/project/vc12/nvimgdiff/nvimgdiff.vcxproj
@@ -86,13 +86,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
%(AdditionalDependencies)
@@ -113,13 +114,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVDXT_SHARED;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
..\..\..\extern\gnuwin32\lib;%(AdditionalLibraryDirectories)
@@ -181,6 +183,7 @@
Level3
+ true
..\..\..\extern\gnuwin32\lib;%(AdditionalLibraryDirectories)
@@ -240,4 +243,4 @@
-
\ No newline at end of file
+
diff --git a/project/vc12/nvmath/nvmath.vcxproj b/project/vc12/nvmath/nvmath.vcxproj
index bc1ff6a..63728ab 100644
--- a/project/vc12/nvmath/nvmath.vcxproj
+++ b/project/vc12/nvmath/nvmath.vcxproj
@@ -82,13 +82,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
@@ -99,13 +100,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
@@ -147,6 +149,7 @@
Level3
ProgramDatabase
+ true
diff --git a/project/vc12/nvthread/nvthread.vcxproj b/project/vc12/nvthread/nvthread.vcxproj
index f8a5382..08ec1c1 100644
--- a/project/vc12/nvthread/nvthread.vcxproj
+++ b/project/vc12/nvthread/nvthread.vcxproj
@@ -94,6 +94,8 @@
Level3
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
+ false
true
@@ -104,6 +106,8 @@
Level3
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
+ false
true
@@ -116,6 +120,7 @@
true
true
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
true
@@ -130,6 +135,7 @@
true
true
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
+ true
true
diff --git a/project/vc12/nvtt.sln b/project/vc12/nvtt.sln
index 58b5623..77167a4 100644
--- a/project/vc12/nvtt.sln
+++ b/project/vc12/nvtt.sln
@@ -11,13 +11,29 @@ Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "Solution Items", "Solution
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcxproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject
+ {CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
+ {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{4CFD4876-A026-46C2-AFCF-FB11346E815D} = {4CFD4876-A026-46C2-AFCF-FB11346E815D}
+ {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
+ {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
+ {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
+ {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcxproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
+ ProjectSection(ProjectDependencies) = postProject
+ {4CFD4876-A026-46C2-AFCF-FB11346E815D} = {4CFD4876-A026-46C2-AFCF-FB11346E815D}
+ {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
+ {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
+ {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
+ {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
+ EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcxproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
ProjectSection(ProjectDependencies) = postProject
+ {F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
+ {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
+ {C33787E3-5564-4834-9FE3-A9020455A669} = {C33787E3-5564-4834-9FE3-A9020455A669}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
@@ -31,7 +47,10 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcxproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject
+ {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
+ {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
+ {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcxproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
@@ -40,14 +59,30 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvim
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcxproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
+ ProjectSection(ProjectDependencies) = postProject
+ {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
+ {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
+ {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
+ EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcxproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
+ ProjectSection(ProjectDependencies) = postProject
+ {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
+ {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
+ {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
+ EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt-testsuite", "testsuite\testsuite.vcxproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
+ ProjectSection(ProjectDependencies) = postProject
+ {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
+ EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "imperativeapi", "imperativeapi\imperativeapi.vcxproj", "{7DCF280E-702B-49F3-84A7-AE7E146384D6}"
+ ProjectSection(ProjectDependencies) = postProject
+ {1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
+ EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "bc6h", "bc6h\bc6h.vcxproj", "{C33787E3-5564-4834-9FE3-A9020455A669}"
EndProject
diff --git a/project/vc12/nvtt/nvtt.vcxproj b/project/vc12/nvtt/nvtt.vcxproj
index 9ac72b4..c9de779 100644
--- a/project/vc12/nvtt/nvtt.vcxproj
+++ b/project/vc12/nvtt/nvtt.vcxproj
@@ -306,13 +306,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
%(AdditionalDependencies)
@@ -335,11 +336,12 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_WINDOWS;_USRDLL;NVTT_EXPORTS;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
$(CUDA_LIB_PATH)\..\lib64;%(AdditionalLibraryDirectories)
@@ -366,6 +368,7 @@
Level3
ProgramDatabase
+ true
%(AdditionalDependencies)
@@ -395,6 +398,7 @@
MultiThreadedDLL
Level3
ProgramDatabase
+ true
$(CUDA_LIB_PATH)\..\lib64;%(AdditionalLibraryDirectories)
diff --git a/project/vc12/nvzoom/nvzoom.vcxproj b/project/vc12/nvzoom/nvzoom.vcxproj
index 78ea696..ebd8ce0 100644
--- a/project/vc12/nvzoom/nvzoom.vcxproj
+++ b/project/vc12/nvzoom/nvzoom.vcxproj
@@ -86,13 +86,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVTT_SHARED;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
%(AdditionalDependencies)
@@ -113,13 +114,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;NVDXT_SHARED;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
..\..\..\extern\gnuwin32\lib;%(AdditionalLibraryDirectories)
@@ -172,6 +174,7 @@
Level3
+ true
..\..\..\extern\gnuwin32\lib;%(AdditionalLibraryDirectories)
diff --git a/project/vc12/squish/squish.vcxproj b/project/vc12/squish/squish.vcxproj
index 70cffb4..64980d5 100644
--- a/project/vc12/squish/squish.vcxproj
+++ b/project/vc12/squish/squish.vcxproj
@@ -82,13 +82,14 @@
Disabled
%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
@@ -99,13 +100,14 @@
Disabled
%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_LIB;__SSE2__;__SSE__;__MMX__;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
ProgramDatabase
+ true
@@ -147,6 +149,7 @@
Level3
ProgramDatabase
+ true
diff --git a/project/vc12/testsuite/testsuite.vcxproj b/project/vc12/testsuite/testsuite.vcxproj
index 70b5f62..dbe25ff 100644
--- a/project/vc12/testsuite/testsuite.vcxproj
+++ b/project/vc12/testsuite/testsuite.vcxproj
@@ -153,13 +153,14 @@
Disabled
$(SolutionDir);$(SolutionDir)\..\..\src;$(SolutionDir)\..\..\extern\poshlib;%(AdditionalIncludeDirectories)
WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)
- true
+ false
EnableFastChecks
MultiThreadedDebugDLL
Level3
EditAndContinue
+ true
%(AdditionalDependencies)
@@ -207,6 +208,7 @@
Level3
ProgramDatabase
+ true
%(AdditionalDependencies)
diff --git a/project/vc9/nvtt.sln b/project/vc9/nvtt.sln
index 76457fb..582783a 100644
--- a/project/vc9/nvtt.sln
+++ b/project/vc9/nvtt.sln
@@ -14,6 +14,7 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj",
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject
+ {3DD3A43D-C6EA-460F-821B-6C339A03C5BB} = {3DD3A43D-C6EA-460F-821B-6C339A03C5BB}
{F974F34B-AF02-4C88-8E1E-85475094EA78} = {F974F34B-AF02-4C88-8E1E-85475094EA78}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
diff --git a/src/nvcore/Array.h b/src/nvcore/Array.h
index 2e332fb..f3310e8 100644
--- a/src/nvcore/Array.h
+++ b/src/nvcore/Array.h
@@ -115,7 +115,8 @@ namespace nv
Array & append( const T & val );
Array & operator<< ( T & t );
void pop_back();
- void popBack();
+ void popBack(uint count = 1);
+ void popFront(uint count = 1);
const T & back() const;
T & back();
const T & front() const;
diff --git a/src/nvcore/Array.inl b/src/nvcore/Array.inl
index 2f7cc44..bf325a6 100755
--- a/src/nvcore/Array.inl
+++ b/src/nvcore/Array.inl
@@ -91,11 +91,32 @@ namespace nv
resize( m_size - 1 );
}
template
- NV_FORCEINLINE void Array::popBack()
+ NV_FORCEINLINE void Array::popBack(uint count)
{
- pop_back();
+ nvDebugCheck(m_size >= count);
+ resize(m_size - count);
}
+ template
+ NV_FORCEINLINE void Array::popFront(uint count)
+ {
+ nvDebugCheck(m_size >= count);
+ //resize(m_size - count);
+
+ if (m_size == count) {
+ clear();
+ }
+ else {
+ destroy_range(m_buffer, 0, count);
+
+ memmove(m_buffer, m_buffer + count, sizeof(T) * (m_size - count));
+
+ m_size -= count;
+ }
+
+ }
+
+
// Get back element.
template
NV_FORCEINLINE const T & Array::back() const
diff --git a/src/nvcore/DefsVcWin32.h b/src/nvcore/DefsVcWin32.h
index 7c222d6..c2f6c18 100644
--- a/src/nvcore/DefsVcWin32.h
+++ b/src/nvcore/DefsVcWin32.h
@@ -25,6 +25,10 @@
#if _MSC_VER < 1500
# define vsnprintf _vsnprintf
#endif
+#if _MSC_VER < 1700
+# define strtoll _strtoi64
+# define strtoull _strtoui64
+#endif
#define chdir _chdir
#define getcwd _getcwd
diff --git a/src/nvcore/StrLib.cpp b/src/nvcore/StrLib.cpp
index 3835061..72d6612 100644
--- a/src/nvcore/StrLib.cpp
+++ b/src/nvcore/StrLib.cpp
@@ -160,6 +160,20 @@ void nv::strCat(char * dst, uint size, const char * src)
#endif
}
+NVCORE_API const char * nv::strSkipWhiteSpace(const char * str)
+{
+ nvDebugCheck(str != NULL);
+ while (*str == ' ') str++;
+ return str;
+}
+
+NVCORE_API char * nv::strSkipWhiteSpace(char * str)
+{
+ nvDebugCheck(str != NULL);
+ while (*str == ' ') str++;
+ return str;
+}
+
/** Pattern matching routine. I don't remember where did I get this. */
bool nv::strMatch(const char * str, const char * pat)
@@ -252,7 +266,9 @@ StringBuilder::StringBuilder( const StringBuilder & s ) : m_size(0), m_str(NULL)
/** Copy string. */
StringBuilder::StringBuilder(const char * s) : m_size(0), m_str(NULL)
{
- copy(s);
+ if (s != NULL) {
+ copy(s);
+ }
}
/** Copy string. */
@@ -514,6 +530,21 @@ bool StringBuilder::beginsWith(const char * str) const
return strncmp(m_str, str, l) == 0;
}
+// Find given char starting from the end.
+char * StringBuilder::reverseFind(char c)
+{
+ int length = (int)strlen(m_str) - 1;
+ while (length >= 0 && m_str[length] != c) {
+ length--;
+ }
+ if (length >= 0) {
+ return m_str + length;
+ }
+ else {
+ return NULL;
+ }
+}
+
/** Reset the string. */
void StringBuilder::reset()
@@ -553,15 +584,20 @@ const char * Path::extension() const
}
+/*static */void Path::translatePath(char * path, char pathSeparator/*= NV_PATH_SEPARATOR*/) {
+ nvCheck(path != NULL);
+
+ for (int i = 0;; i++) {
+ if (path[i] == '\0') break;
+ if (path[i] == '\\' || path[i] == '/') path[i] = pathSeparator;
+ }
+}
+
/// Toggles path separators (ie. \\ into /).
void Path::translatePath(char pathSeparator/*=NV_PATH_SEPARATOR*/)
{
- nvCheck( m_str != NULL );
-
- for (int i = 0; ; i++) {
- if (m_str[i] == '\0') break;
- if (m_str[i] == '\\' || m_str[i] == '/') m_str[i] = pathSeparator;
- }
+ nvCheck(!isNull());
+ translatePath(m_str, pathSeparator);
}
void Path::appendSeparator(char pathSeparator/*=NV_PATH_SEPARATOR*/)
diff --git a/src/nvcore/StrLib.h b/src/nvcore/StrLib.h
index 5d3d248..f4f3ac3 100644
--- a/src/nvcore/StrLib.h
+++ b/src/nvcore/StrLib.h
@@ -54,6 +54,9 @@ namespace nv
NVCORE_API void strCpy(char * dst, uint size, const char * src, uint len);
NVCORE_API void strCat(char * dst, uint size, const char * src);
+ NVCORE_API const char * strSkipWhiteSpace(const char * str);
+ NVCORE_API char * strSkipWhiteSpace(char * str);
+
NVCORE_API bool strMatch(const char * str, const char * pat) NV_PURE;
NVCORE_API bool isNumber(const char * str) NV_PURE;
@@ -123,6 +126,8 @@ namespace nv
bool endsWith(const char * str) const;
bool beginsWith(const char * str) const;
+ char * reverseFind(char c);
+
void reset();
bool isNull() const { return m_size == 0; }
@@ -195,6 +200,8 @@ namespace nv
NVCORE_API static char separator();
NVCORE_API static const char * fileName(const char *);
NVCORE_API static const char * extension(const char *);
+
+ NVCORE_API static void translatePath(char * path, char pathSeparator = NV_PATH_SEPARATOR);
};
diff --git a/src/nvcore/Utils.h b/src/nvcore/Utils.h
index 79f4ef7..778b252 100644
--- a/src/nvcore/Utils.h
+++ b/src/nvcore/Utils.h
@@ -136,6 +136,14 @@ namespace nv
return (b < a) ? a : b;
}
+ /// Return the maximum of the four arguments.
+ template
+ //inline const T & max4(const T & a, const T & b, const T & c)
+ inline T max4(const T & a, const T & b, const T & c, const T & d)
+ {
+ return max(max(a, b), max(c, d));
+ }
+
/// Return the maximum of the three arguments.
template
//inline const T & max3(const T & a, const T & b, const T & c)
diff --git a/src/nvcore/nvcore.h b/src/nvcore/nvcore.h
index ae80afd..511d23e 100644
--- a/src/nvcore/nvcore.h
+++ b/src/nvcore/nvcore.h
@@ -133,7 +133,7 @@
#endif
#if NV_CC_MSVC
-#define NV_CC_CPP11 (__cplusplus > 199711L)
+#define NV_CC_CPP11 (__cplusplus > 199711L || _MSC_VER >= 1800) // Visual Studio 2013 has all the features we use, but doesn't advertise full C++11 support yet.
#else
// @@ IC: This works in CLANG, about GCC?
// @@ ES: Doesn't work in gcc. These 3 features are available in GCC >= 4.4.
diff --git a/src/nvmath/Matrix.h b/src/nvmath/Matrix.h
index dca4529..2f14f0c 100644
--- a/src/nvmath/Matrix.h
+++ b/src/nvmath/Matrix.h
@@ -77,6 +77,9 @@ namespace nv
Vector4 row(uint i) const;
Vector4 column(uint i) const;
+ void zero();
+ void identity();
+
void scale(float s);
void scale(Vector3::Arg s);
void translate(Vector3::Arg t);
diff --git a/src/nvmath/Matrix.inl b/src/nvmath/Matrix.inl
index 603d523..34fdc70 100644
--- a/src/nvmath/Matrix.inl
+++ b/src/nvmath/Matrix.inl
@@ -323,6 +323,22 @@ namespace nv
return Vector4(get(0, i), get(1, i), get(2, i), get(3, i));
}
+ inline void Matrix::zero()
+ {
+ m_data[0] = 0; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
+ m_data[4] = 0; m_data[5] = 0; m_data[6] = 0; m_data[7] = 0;
+ m_data[8] = 0; m_data[9] = 0; m_data[10] = 0; m_data[11] = 0;
+ m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 0;
+ }
+
+ inline void Matrix::identity()
+ {
+ m_data[0] = 1; m_data[1] = 0; m_data[2] = 0; m_data[3] = 0;
+ m_data[4] = 0; m_data[5] = 1; m_data[6] = 0; m_data[7] = 0;
+ m_data[8] = 0; m_data[9] = 0; m_data[10] = 1; m_data[11] = 0;
+ m_data[12] = 0; m_data[13] = 0; m_data[14] = 0; m_data[15] = 1;
+ }
+
// Apply scale.
inline void Matrix::scale(float s)
{
diff --git a/src/nvmath/Vector.inl b/src/nvmath/Vector.inl
index c896885..80c8e6f 100644
--- a/src/nvmath/Vector.inl
+++ b/src/nvmath/Vector.inl
@@ -446,8 +446,18 @@ namespace nv
}
inline float triangleArea(Vector2::Arg a, Vector2::Arg b, Vector2::Arg c)
{
- return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f;
- //return triangleArea(a-c, b-c);
+ // IC: While it may be appealing to use the following expression:
+ //return (c.x * a.y + a.x * b.y + b.x * c.y - b.x * a.y - c.x * b.y - a.x * c.y); // * 0.5f;
+
+ // That's actually a terrible idea. Small triangles far from the origin can end up producing fairly large floating point
+ // numbers and the results becomes very unstable and dependent on the order of the factors.
+
+ // Instead, it's preferable to substract the vertices first, and multiply the resulting small values together. The result
+ // in this case is always much more accurate (as long as the triangle is small) and less dependent of the location of
+ // the triangle.
+
+ //return ((a.x - c.x) * (b.y - c.y) - (a.y - c.y) * (b.x - c.x)); // * 0.5f;
+ return triangleArea(a-c, b-c);
}
diff --git a/src/nvmath/nvmath.h b/src/nvmath/nvmath.h
index baeb02a..fa423a3 100644
--- a/src/nvmath/nvmath.h
+++ b/src/nvmath/nvmath.h
@@ -255,9 +255,10 @@ namespace nv
inline int sign(float a)
{
- if (a > 0.0f) return 1;
- if (a < 0.0f) return -1;
- return 0;
+ return (a > 0) - (a < 0);
+ //if (a > 0.0f) return 1;
+ //if (a < 0.0f) return -1;
+ //return 0;
}
union Float754 {
diff --git a/src/nvthread/Atomic.h b/src/nvthread/Atomic.h
index ec3044a..e6aaa3e 100644
--- a/src/nvthread/Atomic.h
+++ b/src/nvthread/Atomic.h
@@ -119,18 +119,35 @@ namespace nv {
#if NV_CC_MSVC
NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long));
+ // Returns incremented value.
inline uint32 atomicIncrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
- return (uint32)_InterlockedIncrement((long *)value);
+ return uint32(_InterlockedIncrement((long *)value));
}
+ // Returns decremented value.
inline uint32 atomicDecrement(uint32 * value)
{
nvDebugCheck((intptr_t(value) & 3) == 0);
- return (uint32)_InterlockedDecrement((long *)value);
+ return uint32(_InterlockedDecrement((long *)value));
}
+ // Returns added value.
+ inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
+ nvDebugCheck((intptr_t(value) & 3) == 0);
+ return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add)) + value_to_add;
+ }
+
+ // Returns original value before addition.
+ inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
+ nvDebugCheck((intptr_t(value) & 3) == 0);
+ return uint32(_InterlockedExchangeAdd((long*)value, (long)value_to_add));
+ }
+
+
+
+
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak? Does InterlockedCompareExchange have spurious failures?
@@ -148,10 +165,7 @@ namespace nv {
return (uint32)_InterlockedExchange((long *)value, (long)desired);
}
- inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
- nvDebugCheck((intptr_t(value) & 3) == 0);
- return (uint32)_InterlockedExchangeAdd((long*)value, (long)value_to_add);
- }
+
#elif NV_CC_CLANG && (NV_OS_IOS || NV_OS_DARWIN)
NV_COMPILER_CHECK(sizeof(uint32) == sizeof(long));
@@ -180,20 +194,31 @@ namespace nv {
}
*/
- inline uint32 atomicIncrement(uint32 * value)
- {
+ // Returns incremented value.
+ inline uint32 atomicIncrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
-
return __sync_add_and_fetch(value, 1);
}
- inline uint32 atomicDecrement(uint32 * value)
- {
+ // Returns decremented value.
+ inline uint32 atomicDecrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
-
return __sync_sub_and_fetch(value, 1);
}
-
+
+ // Returns added value.
+ inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
+ nvDebugCheck((intptr_t(value) & 3) == 0);
+ return __sync_add_and_fetch(value, value_to_add);
+ }
+
+ // Returns original value before addition.
+ inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
+ nvDebugCheck((intptr_t(value) & 3) == 0);
+ return __sync_fetch_and_add(value, value_to_add);
+ }
+
+
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak?
@@ -210,10 +235,6 @@ namespace nv {
return __sync_lock_test_and_set(value, desired);
}
- inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
- nvDebugCheck((intptr_t(value) & 3) == 0);
- return __sync_add_and_fetch(value, value_to_add);
- }
@@ -271,20 +292,30 @@ namespace nv {
// Many alternative implementations at:
// http://www.memoryhole.net/kyle/2007/05/atomic_incrementing.html
- inline uint32 atomicIncrement(uint32 * value)
- {
+ // Returns incremented value.
+ inline uint32 atomicIncrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
-
return __sync_add_and_fetch(value, 1);
}
- inline uint32 atomicDecrement(uint32 * value)
- {
+ // Returns decremented value.
+ inline uint32 atomicDecrement(uint32 * value) {
nvDebugCheck((intptr_t(value) & 3) == 0);
-
return __sync_sub_and_fetch(value, 1);
}
-
+
+ // Returns added value.
+ inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
+ nvDebugCheck((intptr_t(value) & 3) == 0);
+ return __sync_add_and_fetch(value, value_to_add);
+ }
+
+ // Returns original value before addition.
+ inline uint32 atomicFetchAndAdd(uint32 * value, uint32 value_to_add) {
+ nvDebugCheck((intptr_t(value) & 3) == 0);
+ return __sync_fetch_and_add(value, value_to_add);
+ }
+
// Compare '*value' against 'expected', if equal, then stores 'desired' in '*value'.
// @@ C++0x style CAS? Unlike the C++0x version, 'expected' is not passed by reference and not mutated.
// @@ Is this strong or weak?
@@ -300,12 +331,6 @@ namespace nv {
// this is confusingly named, it doesn't actually do a test but always sets
return __sync_lock_test_and_set(value, desired);
}
-
- inline uint32 atomicAdd(uint32 * value, uint32 value_to_add) {
- nvDebugCheck((intptr_t(value) & 3) == 0);
- return __sync_add_and_fetch(value, value_to_add);
- }
-
#else
#error "Atomics not implemented."
diff --git a/src/nvthread/ParallelFor.cpp b/src/nvthread/ParallelFor.cpp
index c8e901e..b4449d6 100644
--- a/src/nvthread/ParallelFor.cpp
+++ b/src/nvthread/ParallelFor.cpp
@@ -11,18 +11,20 @@ using namespace nv;
#define ENABLE_PARALLEL_FOR 1
-static void worker(void * arg) {
+static void worker(void * arg, int tid) {
ParallelFor * owner = (ParallelFor *)arg;
while(true) {
- // Consume one element at a time. @@ Might be more efficient to have custom grain.
- uint i = atomicIncrement(&owner->idx);
- if (i > owner->count) {
+ uint new_idx = atomicFetchAndAdd(&owner->idx, owner->step);
+ if (new_idx >= owner->count) {
break;
}
- owner->task(owner->context, i - 1);
- }
+ const uint count = min(owner->count, new_idx + owner->step);
+ for (uint i = new_idx; i < count; i++) {
+ owner->task(owner->context, /*tid, */i);
+ }
+ }
}
@@ -38,22 +40,16 @@ ParallelFor::~ParallelFor() {
#endif
}
-void ParallelFor::run(uint count, bool calling_thread_process_work /*=false*/) {
+void ParallelFor::run(uint count, uint step/*= 1*/) {
#if ENABLE_PARALLEL_FOR
storeRelease(&this->count, count);
+ storeRelease(&this->step, step);
// Init atomic counter to zero.
storeRelease(&idx, 0);
// Start threads.
- pool->start(worker, this);
-
- if (calling_thread_process_work) {
- worker(this);
- }
-
- // Wait for all threads to complete.
- pool->wait();
+ pool->run(worker, this);
nvDebugCheck(idx >= count);
#else
@@ -63,4 +59,3 @@ void ParallelFor::run(uint count, bool calling_thread_process_work /*=false*/) {
#endif
}
-
diff --git a/src/nvthread/ParallelFor.h b/src/nvthread/ParallelFor.h
index b442dc6..f69e96c 100644
--- a/src/nvthread/ParallelFor.h
+++ b/src/nvthread/ParallelFor.h
@@ -12,26 +12,169 @@ namespace nv
class Thread;
class ThreadPool;
- typedef void ForTask(void * context, int id);
+ typedef void ForTask(void * context, /*int tid,*/ int idx); // @@ It would be nice to have the thread index as an argument here.
struct ParallelFor {
ParallelFor(ForTask * task, void * context);
~ParallelFor();
- void run(uint count, bool calling_thread_process_work = false);
+ void run(uint count, uint step = 1);
// Invariant:
ForTask * task;
void * context;
ThreadPool * pool;
- //uint workerCount; // @@ Move to thread pool.
- //Thread * workers;
// State:
uint count;
+ uint step;
/*atomic*/ uint idx;
};
+
+#if NV_CC_CPP11
+
+ template
+ void sequential_for(uint count, F f) {
+ for (uint i = 0; i < count; i++) {
+ f(i);
+ }
+ }
+
+
+ template
+ void parallel_for(uint count, uint step, F f) {
+ // Transform lambda into function pointer.
+ auto lambda = [](void* context, /*int tid, */int idx) {
+ F & f = *reinterpret_cast(context);
+ f(/*tid, */idx);
+ };
+
+ ParallelFor pf(lambda, &f);
+ pf.run(count, step);
+ }
+
+
+ template
+ void parallel_for(uint count, F f) {
+ parallel_for(count, /*step=*/1, f);
+ }
+
+
+ template
+ void parallel_for_if(uint count, uint step, bool condition, F f) {
+ if (condition) {
+ parallel_for(count, step, f);
+ }
+ else {
+ sequential_for(count, f);
+ }
+ }
+
+
+#if 0
+ template
+ void parallel_for_each(Array & array, uint step, F f) {
+ // Transform lambda into function pointer.
+ auto lambda = [](void* context, int idx) {
+ F & f = *reinterpret_cast(context);
+ f(array[idx]);
+ };
+
+ ParallelFor pf(lambda, &f);
+ pf.run(count, step);
+ }
+#endif
+
+
+#endif // NV_CC_CPP11
+
+
+/*
+
+#include "nvthread/Mutex.h"
+#include "nvcore/Array.inl"
+
+ template
+ struct ParallelOutputStream {
+#if 0
+ // In its most basic implementation the parallel stream is simply a single array protected by a mutex.
+ Parallel_Output_Stream(uint producer_count) {}
+
+ void reset() { final_array.clear(); }
+ void append(uint producer_id, const T & t) { Lock(mutex); final_array.append(t); }
+ nv::Array & finalize() { return final_array; }
+
+ nv::Mutex mutex;
+ nv::Array final_array;
+
+#elif 0
+ // Another simple implementation is to have N arrays that are merged at the end.
+ ParallelOutputStream(uint producer_count) : producer_count(producer_count) {
+ partial_array = new Array[producer_count];
+ }
+
+ void reset() {
+ for (int i = 0; i < producer_count; i++) {
+ partial_array[i].clear();
+ }
+ }
+
+ void append(uint producer_id, const T & t) {
+ nvCheck(producer_id < producer_count);
+ partial_array[producer_id].append(t);
+ }
+
+ nv::Array & finalize() {
+ for (int i = 1; i < producer_count; i++) {
+ partial_array->append(partial_array[i]);
+ partial_array[i].clear();
+ }
+ return *partial_array;
+ }
+
+ uint producer_count;
+ nv::Array * partial_array;
+#else
+ ParallelOutputStream(uint producer_count) : producer_count(producer_count) {
+ partial_array = new PartialArray[producer_count];
+ }
+
+ // But a more sophisticated implementation keeps N short arrays that are merged as they get full. This preserves partial order.
+ struct PartialArray { // Make sure this is aligned to cache lines. We want producers to access their respective arrays without conflicts.
+ uint count;
+ T data[32]; // Pick size to minimize wasted space considering cache line alignment?
+ };
+
+ const uint producer_count;
+ PartialArray * partial_array;
+
+ // @@ Make sure mutex and partial_array are not in the same cache line!
+
+ nv::Mutex mutex;
+ nv::Array final_array;
+
+ void append(uint producer_id, const T & t) {
+ if (partial_array[producer_id].count == 32) {
+ partial_array[producer_id].count = 0;
+ Lock(mutex);
+ final_array.append(partial_array[producer_id].data, 32);
+ }
+
+ partial_array[producer_id].data[partial_array[producer_id].count++] = t;
+ }
+ nv::Array & finalize() {
+ for (int i = 0; i < producer_count; i++) {
+ final_array.append(partial_array[producer_id].data, partial_array[producer_id].count);
+ }
+ return final_array;
+ }
+#endif
+ };
+
+*/
+
+
} // nv namespace
diff --git a/src/nvthread/Thread.cpp b/src/nvthread/Thread.cpp
index b9e3bc3..869d7e1 100644
--- a/src/nvthread/Thread.cpp
+++ b/src/nvthread/Thread.cpp
@@ -89,7 +89,7 @@ Thread::Thread() : p(new Private)
p->name = NULL;
}
-Thread::Thread(const char * const name) : p(new Private)
+Thread::Thread(const char * name) : p(new Private)
{
p->thread = 0;
p->name = name;
@@ -100,6 +100,12 @@ Thread::~Thread()
nvDebugCheck(p->thread == 0);
}
+void Thread::setName(const char * name)
+{
+ nvCheck(p->name == NULL);
+ p->name = name;
+}
+
void Thread::start(ThreadFunc * func, void * arg)
{
p->func = func;
@@ -110,10 +116,12 @@ void Thread::start(ThreadFunc * func, void * arg)
p->thread = CreateThread(NULL, 0, threadFunc, p.ptr(), 0, &threadId);
//p->thread = (HANDLE)_beginthreadex (0, 0, threadFunc, p.ptr(), 0, NULL); // @@ So that we can call CRT functions...
nvDebugCheck(p->thread != NULL);
- setThreadName(threadId, p->name);
-#if NV_USE_TELEMETRY
- tmThreadName(tmContext, threadId, p->name);
-#endif
+ if (p->name != NULL) {
+ setThreadName(threadId, p->name);
+ #if NV_USE_TELEMETRY
+ tmThreadName(tmContext, threadId, p->name);
+ #endif
+ }
#elif NV_OS_ORBIS
int ret = scePthreadCreate(&p->thread, NULL, threadFunc, p.ptr(), p->name ? p->name : "nv::Thread");
nvDebugCheck(ret == 0);
diff --git a/src/nvthread/Thread.h b/src/nvthread/Thread.h
index 5e0f0e2..e68bb22 100644
--- a/src/nvthread/Thread.h
+++ b/src/nvthread/Thread.h
@@ -17,9 +17,11 @@ namespace nv
NV_FORBID_COPY(Thread);
public:
Thread();
- Thread(const char * const name);
+ Thread(const char * name);
~Thread();
+ void setName(const char * name);
+
void start(ThreadFunc * func, void * arg);
void wait();
diff --git a/src/nvthread/ThreadPool.cpp b/src/nvthread/ThreadPool.cpp
index 53667ae..8373870 100644
--- a/src/nvthread/ThreadPool.cpp
+++ b/src/nvthread/ThreadPool.cpp
@@ -6,6 +6,13 @@
#include "Atomic.h"
#include "nvcore/Utils.h"
+#include "nvcore/StrLib.h"
+
+#if NV_USE_TELEMETRY
+#include
+extern HTELEMETRY tmContext;
+#endif
+
// Most of the time it's not necessary to protect the thread pool, but if it doesn't add a significant overhead, then it'd be safer to do it.
#define PROTECT_THREAD_POOL 1
@@ -20,6 +27,14 @@ Mutex s_pool_mutex("thread pool");
AutoPtr s_pool;
+/*static*/ void ThreadPool::setup(uint workerCount, bool useThreadAffinity, bool useCallingThread) {
+#if PROTECT_THREAD_POOL
+ Lock lock(s_pool_mutex);
+#endif
+
+ s_pool = new ThreadPool(workerCount, useThreadAffinity, useCallingThread);
+}
+
/*static*/ ThreadPool * ThreadPool::acquire()
{
#if PROTECT_THREAD_POOL
@@ -52,36 +67,59 @@ AutoPtr s_pool;
/*static*/ void ThreadPool::workerFunc(void * arg) {
uint i = U32((uintptr_t)arg); // This is OK, because workerCount should always be much smaller than 2^32
+ //ThreadPool::threadId = i;
+
+ if (s_pool->useThreadAffinity) {
+ lockThreadToProcessor(s_pool->useCallingThread + i);
+ }
+
while(true)
{
s_pool->startEvents[i].wait();
- nv::ThreadFunc * func = loadAcquirePointer(&s_pool->func);
+ ThreadTask * func = loadAcquirePointer(&s_pool->func);
if (func == NULL) {
return;
}
- func(s_pool->arg);
+ {
+#if NV_USE_TELEMETRY
+ tmZoneFiltered(tmContext, 20, TMZF_NONE, "worker");
+#endif
+ func(s_pool->arg, s_pool->useCallingThread + i);
+ }
s_pool->finishEvents[i].post();
}
}
-ThreadPool::ThreadPool()
+ThreadPool::ThreadPool(uint workerCount/*=processorCount()*/, bool useThreadAffinity/*=true*/, bool useCallingThread/*=false*/)
{
s_pool = this; // Worker threads need this to be initialized before they start.
- workerCount = nv::hardwareThreadCount();
- workers = new Thread[workerCount];
+ this->useThreadAffinity = useThreadAffinity;
+ this->workerCount = workerCount;
+ this->useCallingThread = useCallingThread;
+
+ uint threadCount = workerCount - useCallingThread;
- startEvents = new Event[workerCount];
- finishEvents = new Event[workerCount];
+ workers = new Thread[threadCount];
+
+ startEvents = new Event[threadCount];
+ finishEvents = new Event[threadCount];
nvCompilerWriteBarrier(); // @@ Use a memory fence?
- for (uint i = 0; i < workerCount; i++) {
+ if (useCallingThread && useThreadAffinity) {
+ lockThreadToProcessor(0); // Calling thread always locked to processor 0.
+ }
+
+ for (uint i = 0; i < threadCount; i++) {
+ StringBuilder name;
+ name.format("worker %d", i);
+ workers[i].setName(name.release()); // @Leak
workers[i].start(workerFunc, (void *)i);
}
@@ -94,14 +132,28 @@ ThreadPool::~ThreadPool()
start(NULL, NULL);
// Wait until threads actually exit.
- Thread::wait(workers, workerCount);
+ Thread::wait(workers, workerCount - useCallingThread);
delete [] workers;
delete [] startEvents;
delete [] finishEvents;
}
-void ThreadPool::start(ThreadFunc * func, void * arg)
+void ThreadPool::run(ThreadTask * func, void * arg)
+{
+ // Wait until threads are idle.
+ wait();
+
+ start(func, arg);
+
+ if (useCallingThread) {
+ func(arg, 0);
+ }
+
+ wait();
+}
+
+void ThreadPool::start(ThreadTask * func, void * arg)
{
// Wait until threads are idle.
wait();
@@ -113,7 +165,7 @@ void ThreadPool::start(ThreadFunc * func, void * arg)
allIdle = false;
// Resume threads.
- Event::post(startEvents, workerCount);
+ Event::post(startEvents, workerCount - useCallingThread);
}
void ThreadPool::wait()
@@ -121,7 +173,7 @@ void ThreadPool::wait()
if (!allIdle)
{
// Wait for threads to complete.
- Event::wait(finishEvents, workerCount);
+ Event::wait(finishEvents, workerCount - useCallingThread);
allIdle = true;
}
diff --git a/src/nvthread/ThreadPool.h b/src/nvthread/ThreadPool.h
index fb75b6d..e678b93 100644
--- a/src/nvthread/ThreadPool.h
+++ b/src/nvthread/ThreadPool.h
@@ -14,30 +14,42 @@
// The thread pool runs the same function in all worker threads, the idea is to use this as the foundation of a custom task scheduler.
// When the thread pool starts, the main thread continues running, but the common use case is to inmmediately wait for the termination events of the worker threads.
// @@ The start and wait methods could probably be merged.
+// It may be running the thread function on the invoking thread to avoid thread switches.
namespace nv {
class Thread;
class Event;
+ typedef void ThreadTask(void * context, int id);
+
class ThreadPool {
NV_FORBID_COPY(ThreadPool);
public:
+ static void setup(uint workerCount, bool useThreadAffinity, bool useCallingThread);
+
static ThreadPool * acquire();
static void release(ThreadPool *);
- ThreadPool();
+ ThreadPool(uint workerCount = processorCount(), bool useThreadAffinity = true, bool useCallingThread = false);
~ThreadPool();
- void start(ThreadFunc * func, void * arg);
+ void run(ThreadTask * func, void * arg);
+
+ void start(ThreadTask * func, void * arg);
void wait();
+ //NV_THREAD_LOCAL static uint threadId;
+
private:
static void workerFunc(void * arg);
+ bool useThreadAffinity;
+ bool useCallingThread;
uint workerCount;
+
Thread * workers;
Event * startEvents;
Event * finishEvents;
@@ -45,10 +57,29 @@ namespace nv {
uint allIdle;
// Current function:
- ThreadFunc * func;
+ ThreadTask * func;
void * arg;
};
+
+#if NV_CC_CPP11
+
+ template
+ void thread_pool_run(F f) {
+ // Transform lambda into function pointer.
+ auto lambda = [](void* context, int id) {
+ F & f = *reinterpret_cast(context);
+ f(id);
+ };
+
+ ThreadPool * pool = ThreadPool::acquire();
+ pool->run(lambda, &f);
+ ThreadPool::release(pool);
+ }
+
+#endif // NV_CC_CPP11
+
+
} // namespace nv
diff --git a/src/nvthread/nvthread.cpp b/src/nvthread/nvthread.cpp
index fe20592..2db7c87 100644
--- a/src/nvthread/nvthread.cpp
+++ b/src/nvthread/nvthread.cpp
@@ -27,15 +27,65 @@
using namespace nv;
+#if NV_OS_WIN32
+
+typedef BOOL(WINAPI *LPFN_GSI)(LPSYSTEM_INFO);
+typedef BOOL(WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
+
+static bool isWow64() {
+ LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle("kernel32"), "IsWow64Process");
+
+ BOOL wow64 = FALSE;
+
+ if (NULL != fnIsWow64Process) {
+ if (!fnIsWow64Process(GetCurrentProcess(), &wow64)) {
+ // If error, assume false.
+ }
+ }
+
+ return wow64 != 0;
+}
-// Find the number of cores in the system.
+static void getSystemInfo(SYSTEM_INFO * sysinfo) {
+ BOOL success = FALSE;
+
+ if (isWow64()) {
+ LPFN_GSI fnGetNativeSystemInfo = (LPFN_GSI)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "GetNativeSystemInfo");
+
+ if (fnGetNativeSystemInfo != NULL) {
+ success = fnGetNativeSystemInfo(sysinfo);
+ }
+ }
+
+ if (!success) {
+ GetSystemInfo(sysinfo);
+ }
+}
+
+#endif // NV_OS_WIN32
+
+// Find the number of logical processors in the system.
// Based on: http://stackoverflow.com/questions/150355/programmatically-find-the-number-of-cores-on-a-machine
-// @@ Distinguish between logical and physical cores?
-uint nv::hardwareThreadCount() {
+uint nv::processorCount() {
#if NV_OS_WIN32
SYSTEM_INFO sysinfo;
- GetSystemInfo( &sysinfo );
- return sysinfo.dwNumberOfProcessors;
+ getSystemInfo(&sysinfo);
+ //return sysinfo.dwNumberOfProcessors;
+
+ // Respect process affinity mask?
+ DWORD_PTR pam, sam;
+ GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam);
+
+ // Count number of bits set in the processor affinity mask.
+ uint count = 0;
+ for (int i = 0; i < sizeof(DWORD_PTR) * 8; i++) {
+ if (pam & (DWORD_PTR(1) << i)) count++;
+ }
+ nvDebugCheck(count <= sysinfo.dwNumberOfProcessors);
+
+ return count;
+#elif NV_OS_ORBIS
+ return 6;
#elif NV_OS_XBOX
return 3; // or 6?
#elif NV_OS_LINUX // Linux, Solaris, & AIX
@@ -72,10 +122,211 @@ uint nv::hardwareThreadCount() {
#endif
}
+
uint nv::threadId() {
#if NV_OS_WIN32
return GetCurrentThreadId();
#else
return 0; // @@
#endif
-}
\ No newline at end of file
+}
+
+
+// @@ If we are using less worker threads than processors and hyperthreading is available, we probably want to enumerate the logical processors
+// so that the first cores of each processor goes first. This way, if say, we leave 2 hardware threads free, then we still have one worker
+// thread on each physical processor.
+
+// I believe that currently logical processors are enumerated in physical order, that is:
+// 0 = thread a in physical core 0
+// 1 = thread b in physical core 0
+// 2 = thread a in physical core 1
+// ... and so on ...
+// I'm not sure we can actually rely on that. And in any case we should start detecting the number of physical processors, which appears to be a pain
+// to do in a way that's compatible with newer i7 processors.
+
+void nv::lockThreadToProcessor(int idx) {
+#if NV_OS_WIN32
+ //nvDebugCheck(idx < hardwareThreadCount());
+#if 0
+ DWORD_PTR tam = 1 << idx;
+#else
+ DWORD_PTR pam, sam;
+ BOOL rc = GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam);
+
+ // Find the idx's bit set.
+ uint pidx = 0;
+ DWORD_PTR tam = 0;
+ for (int i = 0; i < sizeof(DWORD_PTR) * 8; i++) {
+ DWORD_PTR mask = DWORD_PTR(1) << i;
+ if (pam & mask) {
+ if (pidx == idx) {
+ tam = mask;
+ break;
+ }
+ pidx++;
+ }
+ }
+
+ nvDebugCheck(tam != 0);
+#endif
+
+ SetThreadAffinityMask(GetCurrentThread(), tam);
+#else
+ // @@ NOP
+#endif
+}
+
+
+void nv::unlockThreadToProcessor() {
+#if NV_OS_WIN32
+ DWORD_PTR pam, sam;
+ BOOL rc = GetProcessAffinityMask(GetCurrentProcess(), &pam, &sam);
+ SetThreadAffinityMask(GetCurrentThread(), pam);
+#else
+ // @@ NOP
+#endif
+}
+
+uint nv::logicalProcessorCount() {
+ return processorCount();
+}
+
+
+#if NV_OS_WIN32
+
+struct LOGICALPROCESSORDATA
+{
+ unsigned int nLargestStandardFunctionNumber;
+ unsigned int nLargestExtendedFunctionNumber;
+ int nLogicalProcessorCount;
+ int nLocalApicId;
+ int nCPUcore;
+ int nProcessorId;
+ int nApicIdCoreIdSize;
+ int nNC;
+ int nMNC;
+ int nCPUCoresperProcessor;
+ int nThreadsperCPUCore;
+ int nProcId;
+ int nCoreId;
+ bool CmpLegacy;
+ bool HTT;
+};
+
+#define MAX_NUMBER_OF_LOGICAL_PROCESSORS 96
+#define MAX_NUMBER_OF_PHYSICAL_PROCESSORS 8
+#define MAX_NUMBER_OF_IOAPICS 16
+static LOGICALPROCESSORDATA LogicalProcessorMap[MAX_NUMBER_OF_LOGICAL_PROCESSORS];
+static int PhysProcIds[MAX_NUMBER_OF_PHYSICAL_PROCESSORS + MAX_NUMBER_OF_IOAPICS];
+
+static void gatherProcessorData(LOGICALPROCESSORDATA * p) {
+
+ int CPUInfo[4] = { 0, 0, 0, 0 };
+ __cpuid(CPUInfo, 0);
+
+ p->nLargestStandardFunctionNumber = CPUInfo[0];
+
+ // Get the information associated with each valid Id
+ for (uint i = 0; i <= p->nLargestStandardFunctionNumber; ++i) {
+ __cpuid(CPUInfo, i);
+
+ // Interpret CPU feature information.
+ if (i == 1) {
+ // Some of the bits of LocalApicId represent the CPU core
+ // within a processor and other bits represent the processor ID.
+ p->nLocalApicId = (CPUInfo[1] >> 24) & 0xff;
+ p->HTT = (CPUInfo[3] >> 28) & 0x1;
+ // recalculate later after 0x80000008
+ p->nLogicalProcessorCount = (CPUInfo[1] >> 16) & 0x0FF;
+ }
+ }
+
+ // Calling __cpuid with 0x80000000 as the InfoType argument
+ // gets the number of valid extended IDs.
+ __cpuid(CPUInfo, 0x80000000);
+ p->nLargestExtendedFunctionNumber = CPUInfo[0];
+
+ // Get the information associated with each extended ID.
+ for (uint i = 0x80000000; i <= p->nLargestExtendedFunctionNumber; ++i) {
+ __cpuid(CPUInfo, i);
+ if (i == 0x80000008) {
+ p->nApicIdCoreIdSize = (CPUInfo[2] >> 12) & 0xF;
+ p->nNC = (CPUInfo[2]) & 0x0FF;
+ }
+ }
+
+ // MNC
+ // A value of zero for ApicIdCoreIdSize indicates that MNC is derived by this
+ // legacy formula: MNC = NC + 1
+ // A non-zero value of ApicIdCoreIdSize means that MNC is 2^ApicIdCoreIdSize
+ if (p->nApicIdCoreIdSize) {
+ p->nMNC = 2;
+ for (uint j = p->nApicIdCoreIdSize - 1; j > 0; j--) {
+ p->nMNC = p->nMNC * 2;
+ }
+ }
+ else {
+ p->nMNC = p->nNC + 1;
+ }
+
+ // If HTT==0, then LogicalProcessorCount is reserved, and the CPU contains
+ // one CPU core and the CPU core is single-threaded.
+ // If HTT==1 and CmpLegacy==1, LogicalProcessorCount represents the number of
+ // CPU cores per processor, where each CPU core is single-threaded. If HTT==1
+ // and CmpLegacy==0, then LogicalProcessorCount is the number of threads per
+ // processor, which is the number of cores times the number of threads per core.
+ // The number of cores is NC+1.
+ p->nCPUCoresperProcessor = p->nNC + 1;
+ p->nThreadsperCPUCore = (p->HTT == 0 ? 1 : (p->CmpLegacy == 1 ? 1 : p->nLogicalProcessorCount / p->nCPUCoresperProcessor ));
+
+ // Calculate a mask for the core IDs
+ uint mask = 1;
+ uint numbits = 1;
+ if (p->nApicIdCoreIdSize) {
+ numbits = p->nApicIdCoreIdSize;
+ for (uint j = p->nApicIdCoreIdSize; j > 1; j--) {
+ mask = (mask << 1) + 1;
+ }
+ }
+ p->nProcId = (p->nLocalApicId & ~mask) >> numbits;
+ p->nCoreId = p->nLocalApicId & mask;
+}
+
+
+uint nv::physicalProcessorCount() {
+
+ uint lpc = logicalProcessorCount();
+
+ // Get info about each logical processor.
+ for (uint i = 0; i < lpc; i++) {
+ // Make sure thread doesn't change processor while we gather it's data.
+ lockThreadToProcessor(i);
+
+ gatherProcessorData(&LogicalProcessorMap[i]);
+ }
+
+ unlockThreadToProcessor();
+
+ memset(PhysProcIds, 0, sizeof(PhysProcIds));
+ for (uint i = 0; i < lpc; i++) {
+ PhysProcIds[LogicalProcessorMap[i].nProcId]++;
+ }
+
+ uint pc = 0;
+ for (uint i = 0; i < (MAX_NUMBER_OF_PHYSICAL_PROCESSORS + MAX_NUMBER_OF_IOAPICS); i++) {
+ if (PhysProcIds[i] != 0) {
+ pc++;
+ }
+ }
+
+ return pc;
+}
+
+#else
+
+uint nv::physicalProcessorCount() {
+ // @@ Assume the same.
+ return processorCount();
+}
+
+#endif
\ No newline at end of file
diff --git a/src/nvthread/nvthread.h b/src/nvthread/nvthread.h
index c246b57..392bf7d 100644
--- a/src/nvthread/nvthread.h
+++ b/src/nvthread/nvthread.h
@@ -82,13 +82,16 @@ BOOL WINAPI SwitchToThread(void);
namespace nv
{
+ //void initThreadingSystemInfo();
+
// Reentrant.
- uint hardwareThreadCount();
+ uint processorCount();
+ uint logicalProcessorCount();
+ uint physicalProcessorCount();
- // Not thread-safe. Use from main thread only.
- void initWorkers();
- void shutWorkers();
- void setWorkerFunction(void * func);
+ // Locks the current thread to the given logical processor index.
+ void lockThreadToProcessor(int idx);
+ void unlockThreadToProcessor();
uint threadId();
diff --git a/src/nvtt/CompressorDXT5_RGBM.cpp b/src/nvtt/CompressorDXT5_RGBM.cpp
index 21d4b06..99dd412 100755
--- a/src/nvtt/CompressorDXT5_RGBM.cpp
+++ b/src/nvtt/CompressorDXT5_RGBM.cpp
@@ -17,7 +17,7 @@
using namespace nv;
-static uint atomic_counter = 0;
+//static uint atomic_counter = 0;
float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_weights[16], float min_m, BlockDXT5 * output) {
@@ -87,10 +87,15 @@ float nv::compress_dxt5_rgbm(const Vector4 input_colors[16], const float input_w
m = (m - min_m) / (1 - min_m);
+#if 0
+ // IC: This does indeed happen. What does that mean? The best choice of m is above the available range. If this happened too often it would make sense to scale m in
+ // the pixel shader to allow for more accurate reconstruction. However, that scaling would reduce the precision over the [0-1] range. I haven't measured how much
+ // error is introduced by the clamping vs. how much the error would change with the increased range.
if (m > 1.0f) {
uint counter = atomicIncrement(&atomic_counter);
printf("It happens %u times!", counter);
}
+#endif
M.alpha[i] = U8(ftoi_round(saturate(m) * 255.0f));
M.weights[i] = input_weights[i];
diff --git a/src/nvtt/CompressorRGB.cpp b/src/nvtt/CompressorRGB.cpp
index 1ef7327..7578883 100644
--- a/src/nvtt/CompressorRGB.cpp
+++ b/src/nvtt/CompressorRGB.cpp
@@ -193,7 +193,7 @@ namespace
F.field.biasedexponent = 0xFF;
F.field.mantissa = M << (23 - 6);
-
+ return F.value;
#if 0
// X Channel (6-bit mantissa)
Mantissa = pSource->xm;
@@ -228,9 +228,7 @@ namespace
Result[0] = ((Exponent + 112) << 23) | (Mantissa << 17);
}
- }
#endif
-
}
// https://www.opengl.org/registry/specs/EXT/texture_shared_exponent.txt
diff --git a/src/nvtt/Surface.cpp b/src/nvtt/Surface.cpp
index aa612ee..869f0f1 100644
--- a/src/nvtt/Surface.cpp
+++ b/src/nvtt/Surface.cpp
@@ -52,12 +52,12 @@ using namespace nvtt;
namespace
{
// 1 -> 1, 2 -> 2, 3 -> 2, 4 -> 4, 5 -> 4, ...
- static uint previousPowerOfTwo(const uint v)
+ static inline uint previousPowerOfTwo(uint v)
{
return nextPowerOfTwo(v + 1) / 2;
}
- static uint nearestPowerOfTwo(const uint v)
+ static inline uint nearestPowerOfTwo(uint v)
{
const uint np2 = nextPowerOfTwo(v);
const uint pp2 = previousPowerOfTwo(v);
@@ -72,6 +72,31 @@ namespace
}
}
+ static inline uint nextMultipleOfFour(uint v)
+ {
+ return (v + 3) & ~3;
+ }
+ static inline uint previousMultipleOfFour(uint v)
+ {
+ return v & ~3;
+ }
+
+ static inline uint nearestMultipleOfFour(uint v)
+ {
+ const uint nm4 = nextMultipleOfFour(v);
+ const uint pm4 = previousMultipleOfFour(v);
+
+ if (nm4 - v <= v - pm4)
+ {
+ return nm4;
+ }
+ else
+ {
+ return pm4;
+ }
+ }
+
+
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) {
@@ -225,6 +250,24 @@ void nv::getTargetExtent(int * width, int * height, int * depth, int maxExtent,
h = previousPowerOfTwo(h);
d = previousPowerOfTwo(d);
}
+ else if (roundMode == RoundMode_ToNextMultipleOfFour)
+ {
+ w = nextMultipleOfFour(w);
+ h = nextMultipleOfFour(h);
+ d = nextMultipleOfFour(d);
+ }
+ else if (roundMode == RoundMode_ToNextMultipleOfFour)
+ {
+ w = nearestMultipleOfFour(w);
+ h = nearestMultipleOfFour(h);
+ d = nearestMultipleOfFour(d);
+ }
+ else if (roundMode == RoundMode_ToPreviousMultipleOfFour)
+ {
+ w = previousMultipleOfFour(w);
+ h = previousMultipleOfFour(h);
+ d = previousMultipleOfFour(d);
+ }
*width = w;
*height = h;
diff --git a/src/nvtt/nvtt.h b/src/nvtt/nvtt.h
index 8724d42..3e261b6 100644
--- a/src/nvtt/nvtt.h
+++ b/src/nvtt/nvtt.h
@@ -240,6 +240,9 @@ namespace nvtt
RoundMode_ToNextPowerOfTwo,
RoundMode_ToNearestPowerOfTwo,
RoundMode_ToPreviousPowerOfTwo,
+ RoundMode_ToNextMultipleOfFour, // (New in NVTT 2.1)
+ RoundMode_ToNearestMultipleOfFour, // (New in NVTT 2.1)
+ RoundMode_ToPreviousMultipleOfFour, // (New in NVTT 2.1)
};
// Alpha mode.
diff --git a/src/nvtt/tools/compress.cpp b/src/nvtt/tools/compress.cpp
index d83241d..86979a1 100644
--- a/src/nvtt/tools/compress.cpp
+++ b/src/nvtt/tools/compress.cpp
@@ -641,8 +641,8 @@ int main(int argc, char *argv[])
//compressionOptions.setPixelFormat(10, 10, 10, 2);
// DXGI_FORMAT_R11G11B10_FLOAT
- compressionOptions.setPixelType(nvtt::PixelType_Float);
- compressionOptions.setPixelFormat(11, 11, 10, 0);
+ //compressionOptions.setPixelType(nvtt::PixelType_Float);
+ //compressionOptions.setPixelFormat(11, 11, 10, 0);
}
}
else if (format == nvtt::Format_BC6)