Tag 2.0.2 release.

castano 16 years ago
parent 34ae5bcb6f
commit 0294c4ad93

@ -1,8 +1,3 @@
NVIDIA Texture Tools version 2.1.0
* CTX1 CUDA compressor.
* DXT1n CUDA compressor.
* Support alpha premultiplication by Charles Nicholson. See issue 30.
NVIDIA Texture Tools version 2.0.2
* Fix copy ctor error reported by Richard Sim.
* Fix indexMirror error reported by Chris Lambert.

@ -2,7 +2,7 @@
--------------------------------------------------------------------------------
NVIDIA Texture Tools
README.txt
Version 2.1
Version 2.0
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------

@ -1 +1 @@
2.1.0
2.0.2

Binary file not shown.

Binary file not shown.

@ -278,6 +278,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17"
/>
<Tool

@ -277,6 +277,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17"
/>
<Tool

@ -277,6 +277,10 @@
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvmath\Plane.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
@ -295,6 +299,10 @@
RelativePath="..\..\..\src\nvmath\Matrix.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Plane.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Vector.h"
>

@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
@ -34,16 +34,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
ProjectSection(ProjectDependencies) = postProject
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
@ -55,18 +55,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
ProjectSection(ProjectDependencies) = postProject
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU
@ -319,22 +314,6 @@ Global
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

@ -53,8 +53,8 @@ END
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,1,0,0
PRODUCTVERSION 2,1,0,0
FILEVERSION 2,0,2,0
PRODUCTVERSION 2,0,2,0
FILEFLAGSMASK 0x17L
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 1, 0, 0"
VALUE "FileVersion", "2, 0, 2, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 1, 0, 0"
VALUE "ProductVersion", "2, 0, 2, 0"
END
END
BLOCK "VarFileInfo"

@ -691,7 +691,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -701,7 +701,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -711,7 +711,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -721,7 +721,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>

@ -1,201 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="stress"
ProjectGUID="{317B694E-B5C1-42A6-956F-FC12B69175A6}"
RootNamespace="stress"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvtt\tests\stress.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

@ -1,11 +1,11 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
SUBDIRS(nvcore)
SUBDIRS(nvmath)
SUBDIRS(nvimage)
SUBDIRS(nvtt)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
# OpenGL
INCLUDE(FindOpenGL)
IF(OPENGL_FOUND)
@ -84,7 +84,6 @@ ELSE(PNG_FOUND)
ENDIF(PNG_FOUND)
# TIFF
SET(TIFF_NAMES libtiff)
INCLUDE(FindTIFF)
IF(TIFF_FOUND)
SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise")

@ -1,147 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_ALGORITHMS_H
#define NV_CORE_ALGORITHMS_H
namespace nv
{
/// Return the maximum of two values.
template <typename T>
inline const T & max(const T & a, const T & b)
{
//return std::max(a, b);
if( a < b ) {
return b;
}
return a;
}
/// Return the minimum of two values.
template <typename T>
inline const T & min(const T & a, const T & b)
{
//return std::min(a, b);
if( b < a ) {
return b;
}
return a;
}
/// Clamp between two values.
template <typename T>
inline const T & clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
// @@ Swap should be implemented here.
#if 0
// This does not use swap, but copies, in some cases swaps are much faster than copies!
// Container should implement operator[], and size()
template <class Container, class T>
void insertionSort(Container<T> & container)
{
const uint n = container.size();
for (uint i=1; i < n; ++i)
{
T value = container[i];
uint j = i;
while (j > 0 && container[j-1] > value)
{
container[j] = container[j-1];
--j;
}
if (i != j)
{
container[j] = value;
}
}
}
template <class Container, class T>
void quickSort(Container<T> & container)
{
quickSort(container, 0, container.count());
}
{
/* threshhold for transitioning to insertion sort */
while (n > 12) {
int c01,c12,c,m,i,j;
/* compute median of three */
m = n >> 1;
c = p[0] > p[m];
c01 = c;
c = &p[m] > &p[n-1];
c12 = c;
/* if 0 >= mid >= end, or 0 < mid < end, then use mid */
if (c01 != c12) {
/* otherwise, we'll need to swap something else to middle */
int z;
c = p[0] < p[n-1];
/* 0>mid && mid<n: 0>n => n; 0<n => 0 */
/* 0<mid && mid>n: 0>n => 0; 0<n => n */
z = (c == c12) ? 0 : n-1;
swap(p[z], p[m]);
}
/* now p[m] is the median-of-three */
/* swap it to the beginning so it won't move around */
swap(p[0], p[m]);
/* partition loop */
i=1;
j=n-1;
for(;;) {
/* handling of equality is crucial here */
/* for sentinels & efficiency with duplicates */
for (;;++i) {
c = p[i] > p[0];
if (!c) break;
}
a = &p[0];
for (;;--j) {
b=&p[j];
c = p[j] > p[0]
if (!c) break;
}
/* make sure we haven't crossed */
if (i >= j) break;
swap(p[i], p[j]);
++i;
--j;
}
/* recurse on smaller side, iterate on larger */
if (j < (n-i)) {
quickSort(p, j);
p = p+i;
n = n-i;
}
else {
quickSort(p+i, n-i);
n = j;
}
}
insertionSort();
}
#endif // 0
} // nv namespace
#endif // NV_CORE_ALGORITHMS_H

@ -80,13 +80,13 @@ public:
/// Clear all the bits.
void clearAll()
{
memset(m_bitArray.mutableBuffer(), 0, m_bitArray.size());
memset(m_bitArray.unsecureBuffer(), 0, m_bitArray.size());
}
/// Set all the bits.
void setAll()
{
memset(m_bitArray.mutableBuffer(), 0xFF, m_bitArray.size());
memset(m_bitArray.unsecureBuffer(), 0xFF, m_bitArray.size());
}
/// Toggle all the bits.

@ -3,13 +3,7 @@ ADD_SUBDIRECTORY(poshlib)
SET(CORE_SRCS
nvcore.h
DefsGnucDarwin.h
DefsGnucLinux.h
DefsGnucWin32.h
DefsVcWin32.h
Ptr.h
RefCounted.h
RefCounted.cpp
BitArray.h
Memory.h
Memory.cpp
@ -25,10 +19,7 @@ SET(CORE_SRCS
TextWriter.h
TextWriter.cpp
Radix.h
Radix.cpp
CpuInfo.h
CpuInfo.cpp
Algorithms.h)
Radix.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

@ -19,7 +19,6 @@ Do not use memmove in insert & remove, use copy ctors instead.
#include <nvcore/nvcore.h>
#include <nvcore/Memory.h>
#include <nvcore/Debug.h>
//#include <nvcore/Stream.h>
#include <string.h> // memmove
#include <new> // for placement new
@ -71,10 +70,40 @@ namespace nv
{
// Templates
/// Return the maximum of two values.
template <typename T>
inline const T & max(const T & a, const T & b)
{
//return std::max(a, b);
if( a < b ) {
return b;
}
return a;
}
/// Return the minimum of two values.
template <typename T>
inline const T & min(const T & a, const T & b)
{
//return std::min(a, b);
if( b < a ) {
return b;
}
return a;
}
/// Clamp between two values.
template <typename T>
inline const T & clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/// Swap two values.
template <typename T>
inline void swap(T & a, T & b)
{
//return std::swap(a, b);
T temp = a;
a = b;
b = temp;
@ -105,6 +134,16 @@ namespace nv
uint operator()(uint x) const { return x; }
};
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for(typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
/** Return the next power of two.
* @see http://graphics.stanford.edu/~seander/bithacks.html
@ -115,7 +154,7 @@ namespace nv
inline uint nextPowerOfTwo( uint x )
{
nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction.
#if 1 // On modern CPUs this is as fast as using the bsr instruction.
x--;
x |= x >> 1;
x |= x >> 2;
@ -138,6 +177,15 @@ namespace nv
return (n & (n-1)) == 0;
}
/// Simple iterator interface.
template <typename T>
struct Iterator
{
virtual void advance();
virtual bool isDone();
virtual T current();
};
/**
* Replacement for std::vector that is easier to debug and provides
@ -204,7 +252,7 @@ namespace nv
const T * buffer() const { return m_buffer; }
/// Get vector pointer.
T * mutableBuffer() { return m_buffer; }
T * unsecureBuffer() { return m_buffer; }
/// Is vector empty.
bool isEmpty() const { return m_size == 0; }

@ -1,88 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/CpuInfo.h>
#include <nvcore/Debug.h>
using namespace nv;
#if NV_OS_WIN32
#define _WIN32_WINNT 0x0501
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow64()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
return false;
}
}
return bIsWow64 == TRUE;
}
#endif // NV_OS_WIN32
uint CpuInfo::processorCount()
{
#if NV_OS_WIN32
SYSTEM_INFO sysInfo;
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
if (isWow64())
{
GetNativeSystemInfo(&sysInfo);
}
else
{
GetSystemInfo(&sysInfo);
}
uint count = (uint)sysInfo.dwNumberOfProcessors;
nvDebugCheck(count >= 1);
return count;
#else
return 1;
#endif
}
uint CpuInfo::coreCount()
{
return 1;
}
bool CpuInfo::hasMMX()
{
return false;
}
bool CpuInfo::hasSSE()
{
return false;
}
bool CpuInfo::hasSSE2()
{
return false;
}
bool CpuInfo::hasSSE3()
{
return false;
}

@ -1,88 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_CPUINFO_H
#define NV_CORE_CPUINFO_H
#include <nvcore/nvcore.h>
#if NV_CC_MSVC
# include <intrin.h> // __rdtsc
#endif
namespace nv
{
// CPU Information.
class CpuInfo
{
static uint processorCount();
static uint coreCount();
static bool hasMMX();
static bool hasSSE();
static bool hasSSE2();
static bool hasSSE3();
};
#if NV_CC_MSVC
#pragma intrinsic(__rdtsc)
inline uint64 rdtsc()
{
return __rdtsc();
}
#endif
#if NV_CC_GNUC
#if defined(__i386__)
inline /*volatile*/ uint64 rdtsc()
{
uint64 x;
//__asm__ volatile ("rdtsc" : "=A" (x));
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
#elif defined(__x86_64__)
static __inline__ uint64 rdtsc(void)
{
unsigned int hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}
#elif defined(__powerpc__)
static __inline__ uint64 rdtsc(void)
{
uint64 result=0;
unsigned long int upper, lower, tmp;
__asm__ volatile(
"0: \n"
"\tmftbu %0 \n"
"\tmftb %1 \n"
"\tmftbu %2 \n"
"\tcmpw %2,%0 \n"
"\tbne 0b \n"
: "=r"(upper),"=r"(lower),"=r"(tmp)
);
result = upper;
result = result<<32;
result = result|lower;
return(result);
}
#endif
#endif // NV_CC_GNUC
} // nv namespace
#endif // NV_CORE_CPUINFO_H

@ -70,6 +70,8 @@ typedef uint32 uint;
#pragma warning(disable : 4711) // function selected for automatic inlining
#pragma warning(disable : 4725) // Pentium fdiv bug
#pragma warning(disable : 4345) // behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged.
#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup

@ -8,11 +8,6 @@
#include <stdio.h> // NULL
#define NV_DECLARE_PTR(Class) \
typedef SmartPtr<class Class> Class ## Ptr; \
typedef SmartPtr<const class Class> ClassConst ## Ptr
namespace nv
{
@ -98,23 +93,125 @@ private:
T * m_ptr;
};
#if 0
/** Reference counted base class to be used with Pointer.
*
* The only requirement of the Pointer class is that the RefCounted class implements the
* addRef and release methods.
*/
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0), m_weak_proxy(NULL)
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
mutable WeakProxy * weak_proxy;
};
#endif
/// Smart pointer template class.
template <class BaseClass>
class SmartPtr {
class Pointer {
public:
// BaseClass must implement addRef() and release().
typedef SmartPtr<BaseClass> ThisType;
typedef Pointer<BaseClass> ThisType;
/// Default ctor.
SmartPtr() : m_ptr(NULL)
Pointer() : m_ptr(NULL)
{
}
/** Other type assignment. */
template <class OtherBase>
SmartPtr( const SmartPtr<OtherBase> & tc )
Pointer( const Pointer<OtherBase> & tc )
{
m_ptr = static_cast<BaseClass *>( tc.ptr() );
if( m_ptr ) {
@ -123,7 +220,7 @@ public:
}
/** Copy ctor. */
SmartPtr( const ThisType & bc )
Pointer( const ThisType & bc )
{
m_ptr = bc.ptr();
if( m_ptr ) {
@ -131,8 +228,8 @@ public:
}
}
/** Copy cast ctor. SmartPtr(NULL) is valid. */
explicit SmartPtr( BaseClass * bc )
/** Copy cast ctor. Pointer(NULL) is valid. */
explicit Pointer( BaseClass * bc )
{
m_ptr = bc;
if( m_ptr ) {
@ -141,7 +238,7 @@ public:
}
/** Dtor. */
~SmartPtr()
~Pointer()
{
set(NULL);
}
@ -152,14 +249,14 @@ public:
/** -> operator. */
BaseClass * operator -> () const
{
nvCheck( m_ptr != NULL );
piCheck( m_ptr != NULL );
return m_ptr;
}
/** * operator. */
BaseClass & operator*() const
{
nvCheck( m_ptr != NULL );
piCheck( m_ptr != NULL );
return *m_ptr;
}
@ -175,7 +272,7 @@ public:
//@{
/** Other type assignment. */
template <class OtherBase>
void operator = ( const SmartPtr<OtherBase> & tc )
void operator = ( const Pointer<OtherBase> & tc )
{
set( static_cast<BaseClass *>(tc.ptr()) );
}
@ -198,7 +295,7 @@ public:
//@{
/** Other type equal comparation. */
template <class OtherBase>
bool operator == ( const SmartPtr<OtherBase> & other ) const
bool operator == ( const Pointer<OtherBase> & other ) const
{
return m_ptr == other.ptr();
}
@ -217,7 +314,7 @@ public:
/** Other type not equal comparation. */
template <class OtherBase>
bool operator != ( const SmartPtr<OtherBase> & other ) const
bool operator != ( const Pointer<OtherBase> & other ) const
{
return m_ptr != other.ptr();
}

@ -1,9 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "RefCounted.h"
using namespace nv;
int nv::RefCounted::s_total_ref_count = 0;
int nv::RefCounted::s_total_obj_count = 0;

@ -1,114 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_REFCOUNTED_H
#define NV_CORE_REFCOUNTED_H
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
namespace nv
{
/// Reference counted base class to be used with SmartPtr and WeakPtr.
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0)/*, m_weak_proxy(NULL)*/
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
// releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/*
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
*/
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
// mutable WeakProxy * weak_proxy;
};
} // nv namespace
#endif // NV_CORE_REFCOUNTED_H

@ -48,7 +48,7 @@ const char * TextReader::readToEnd()
m_text.reserve(size + 1);
m_text.resize(size);
m_stream->serialize(m_text.mutableBuffer(), size);
m_stream->serialize(m_text.unsecureBuffer(), size);
m_text.pushBack('\0');
return m_text.buffer();

@ -22,7 +22,6 @@
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Stream.h>
#include <nvcore/Containers.h> // swap
#include "ColorBlock.h"
#include "BlockDXT.h"

@ -1,6 +1,5 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h> // swap
#include <nvmath/Box.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/Image.h>

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (0 << 16) | (9 << 8) | (5); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (2); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;

@ -11,16 +11,16 @@ namespace nv
class Vector4;
/// Base filter class.
class NVIMAGE_CLASS Filter
class Filter
{
public:
Filter(float width);
virtual ~Filter();
NVIMAGE_API Filter(float width);
NVIMAGE_API virtual ~Filter();
float width() const { return m_width; }
float sampleDelta(float x, float scale) const;
float sampleBox(float x, float scale, int samples) const;
float sampleTriangle(float x, float scale, int samples) const;
NVIMAGE_API float width() const { return m_width; }
NVIMAGE_API float sampleDelta(float x, float scale) const;
NVIMAGE_API float sampleBox(float x, float scale, int samples) const;
NVIMAGE_API float sampleTriangle(float x, float scale, int samples) const;
virtual float evaluate(float x) const = 0;
@ -29,56 +29,56 @@ namespace nv
};
// Box filter.
class NVIMAGE_CLASS BoxFilter : public Filter
class BoxFilter : public Filter
{
public:
BoxFilter();
BoxFilter(float width);
virtual float evaluate(float x) const;
NVIMAGE_API BoxFilter();
NVIMAGE_API BoxFilter(float width);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Triangle (bilinear/tent) filter.
class NVIMAGE_CLASS TriangleFilter : public Filter
class TriangleFilter : public Filter
{
public:
TriangleFilter();
TriangleFilter(float width);
virtual float evaluate(float x) const;
NVIMAGE_API TriangleFilter();
NVIMAGE_API TriangleFilter(float width);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Quadratic (bell) filter.
class NVIMAGE_CLASS QuadraticFilter : public Filter
class QuadraticFilter : public Filter
{
public:
QuadraticFilter();
virtual float evaluate(float x) const;
NVIMAGE_API QuadraticFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Cubic filter from Thatcher Ulrich.
class NVIMAGE_CLASS CubicFilter : public Filter
class CubicFilter : public Filter
{
public:
CubicFilter();
virtual float evaluate(float x) const;
NVIMAGE_API CubicFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Cubic b-spline filter from Paul Heckbert.
class NVIMAGE_CLASS BSplineFilter : public Filter
class BSplineFilter : public Filter
{
public:
BSplineFilter();
virtual float evaluate(float x) const;
NVIMAGE_API BSplineFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
/// Mitchell & Netravali's two-param cubic
/// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88
class NVIMAGE_CLASS MitchellFilter : public Filter
class MitchellFilter : public Filter
{
public:
MitchellFilter();
virtual float evaluate(float x) const;
NVIMAGE_API MitchellFilter();
NVIMAGE_API virtual float evaluate(float x) const;
void setParameters(float a, float b);
NVIMAGE_API void setParameters(float a, float b);
private:
float p0, p2, p3;
@ -86,29 +86,29 @@ namespace nv
};
// Lanczos3 filter.
class NVIMAGE_CLASS LanczosFilter : public Filter
class LanczosFilter : public Filter
{
public:
LanczosFilter();
virtual float evaluate(float x) const;
NVIMAGE_API LanczosFilter();
NVIMAGE_API virtual float evaluate(float x) const;
};
// Sinc filter.
class NVIMAGE_CLASS SincFilter : public Filter
class SincFilter : public Filter
{
public:
SincFilter(float w);
virtual float evaluate(float x) const;
NVIMAGE_API SincFilter(float w);
NVIMAGE_API virtual float evaluate(float x) const;
};
// Kaiser filter.
class NVIMAGE_CLASS KaiserFilter : public Filter
class KaiserFilter : public Filter
{
public:
KaiserFilter(float w);
virtual float evaluate(float x) const;
NVIMAGE_API KaiserFilter(float w);
NVIMAGE_API virtual float evaluate(float x) const;
void setParameters(float a, float stretch);
NVIMAGE_API void setParameters(float a, float stretch);
private:
float alpha;
@ -118,12 +118,12 @@ namespace nv
/// A 1D kernel. Used to precompute filter weights.
class NVIMAGE_CLASS Kernel1
class Kernel1
{
NV_FORBID_COPY(Kernel1);
public:
Kernel1(const Filter & f, int iscale, int samples = 32);
~Kernel1();
NVIMAGE_API Kernel1(const Filter & f, int iscale, int samples = 32);
NVIMAGE_API ~Kernel1();
float valueAt(uint x) const {
nvDebugCheck(x < (uint)m_windowSize);
@ -138,7 +138,7 @@ namespace nv
return m_width;
}
void debugPrint();
NVIMAGE_API void debugPrint();
private:
int m_windowSize;
@ -148,15 +148,15 @@ namespace nv
/// A 2D kernel.
class NVIMAGE_CLASS Kernel2
class Kernel2
{
public:
Kernel2(uint width);
Kernel2(const Kernel2 & k);
~Kernel2();
NVIMAGE_API Kernel2(uint width);
NVIMAGE_API Kernel2(const Kernel2 & k);
NVIMAGE_API ~Kernel2();
void normalize();
void transpose();
NVIMAGE_API void normalize();
NVIMAGE_API void transpose();
float valueAt(uint x, uint y) const {
return m_data[y * m_windowSize + x];
@ -166,12 +166,12 @@ namespace nv
return m_windowSize;
}
void initLaplacian();
void initEdgeDetection();
void initSobel();
void initPrewitt();
NVIMAGE_API void initLaplacian();
NVIMAGE_API void initEdgeDetection();
NVIMAGE_API void initSobel();
NVIMAGE_API void initPrewitt();
void initBlendedSobel(const Vector4 & scale);
NVIMAGE_API void initBlendedSobel(const Vector4 & scale);
private:
const uint m_windowSize;
@ -180,12 +180,12 @@ namespace nv
/// A 1D polyphase kernel
class NVIMAGE_CLASS PolyphaseKernel
class PolyphaseKernel
{
NV_FORBID_COPY(PolyphaseKernel);
public:
PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
~PolyphaseKernel();
NVIMAGE_API PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
NVIMAGE_API ~PolyphaseKernel();
int windowSize() const {
return m_windowSize;
@ -205,7 +205,7 @@ namespace nv
return m_data[column * m_windowSize + x];
}
void debugPrint() const;
NVIMAGE_API void debugPrint() const;
private:
int m_windowSize;

@ -636,7 +636,7 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
float * dst_channel = dst_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
dst_channel[y * w + x] = tmp_column[y];
@ -657,7 +657,7 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
float * tmp_channel = tmp_image->channel(c);
for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer());
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) {
tmp_channel[y * w + x] = tmp_column[y];

@ -3,10 +3,8 @@
#ifndef NV_IMAGE_FLOATIMAGE_H
#define NV_IMAGE_FLOATIMAGE_H
#include <stdlib.h> // abs
#include <nvcore/Debug.h>
#include <nvcore/Algorithms.h> // clamp
#include <nvcore/Containers.h> // clamp
#include <nvimage/nvimage.h>
namespace nv

@ -2,7 +2,6 @@
#include <nvcore/Debug.h>
#include <nvcore/Ptr.h>
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>

@ -162,9 +162,6 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
if (strCaseCmp(extension, ".pfm") == 0) {
return loadFloatPFM(fileName, s);
}
if (strCaseCmp(extension, ".hdr") == 0) {
return loadGridFloat(fileName, s);
}
*/
return NULL;
@ -797,7 +794,7 @@ Image * nv::ImageIO::loadJPG(Stream & s)
// Read the entire file.
Array<uint8> byte_array;
byte_array.resize(s.size());
s.serialize(byte_array.mutableBuffer(), s.size());
s.serialize(byte_array.unsecureBuffer(), s.size());
jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr;
@ -1092,8 +1089,7 @@ namespace
virtual void seekg(Imf::Int64 pos)
{
nvDebugCheck(pos >= 0 && pos < UINT_MAX);
m_stream.seek((uint)pos);
m_stream.seek(pos);
}
virtual void clear()
@ -1302,77 +1298,6 @@ bool nv::ImageIO::saveFloatPFM(const char * fileName, const FloatImage * fimage,
return true;
}
#pragma warning(disable : 4996)
NVIMAGE_API FloatImage * nv::ImageIO::loadGridFloat(const char * fileName, Stream & s)
{
nvCheck(s.isLoading());
nvCheck(!s.isError());
Tokenizer parser(&s);
parser.nextLine();
if (parser.token() != "ncols")
{
nvDebug("Failed to find 'ncols' token in file '%s'.\n", fileName);
return NULL;
}
parser.nextToken(true);
const int nCols = parser.token().toInt();
parser.nextToken(true);
if (parser.token() != "nrows")
{
nvDebug("Failed to find 'nrows' token in file '%s'.\n", fileName);
return NULL;
}
parser.nextToken(true);
const int nRows = parser.token().toInt();
/* There's a byte order defined in the header. We could read it. However, here we
just assume that it matches the platform's byte order.
// There is then a bunch of data that we don't care about (lat, long definitions, etc).
for (int i=0; i!=9; ++i)
parser.nextToken(true);
if (parser.token() != "byteorder")
return NULL;
parser.nextToken(true);
const Stream::ByteOrder byteOrder = (parser.token() == "LSBFIRST")? Stream::LittleEndian: Stream::BigEndian;
*/
// GridFloat comes in two files: an ASCII header which was parsed above (.hdr) and a big blob
// of binary data in a .flt file.
Path dataPath(fileName);
dataPath.stripExtension();
dataPath.append(".flt");
// Open the binary data.
FILE* file = fopen(dataPath.fileName(), "rb");
if (!file)
{
nvDebug("Failed to find GridFloat blob file '%s' corresponding to '%s'.\n", dataPath.fileName(), fileName);
return NULL;
}
// Allocate image.
AutoPtr<FloatImage> fimage(new FloatImage());
fimage->allocate(1, nCols, nRows);
float * channel = fimage->channel(0);
// The binary blob is defined to be in row-major order, containing IEEE floats.
// So we can just slurp it in. Theoretically, we ought to use the byte order.
const size_t nRead = fread((void*) channel, sizeof(float), nRows * nCols, file);
fclose(file);
return fimage.release();
}
#endif
#if 0

@ -47,15 +47,10 @@ namespace nv
NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
#endif
/*
NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
// GridFloat is a simple, open format for terrain elevation data. See http://ned.usgs.gov/Ned/faq.asp.
// Expects: 1) fileName will be an ".hdr" header file, 2) there will also exist a corresponding float data
// blob in a ".flt" file. (This is what USGS gives you.)
NVIMAGE_API FloatImage * loadGridFloat(const char * fileName, Stream & s);
*/
// NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
// NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
} // ImageIO namespace
} // nv namespace

@ -12,10 +12,6 @@ http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
@@ This code needs to be reviewed, I'm not sure it's correct.
*/
#include <string.h> // memset
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/Image.h>

@ -49,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
return;
}
}
normal = nv::normalize(normal, epsilon);
normal = ::normalize(normal, epsilon);
tangent -= normal * dot(normal, tangent);
bitangent -= normal * dot(normal, bitangent);
@ -68,8 +68,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
}
else
{
#if 0
tangent = nv::normalize(tangent, epsilon);
tangent = ::normalize(tangent, epsilon);
bitangent -= tangent * dot(tangent, bitangent);
if (length(bitangent) < epsilon)
@ -79,47 +78,11 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
}
else
{
bitangent = nv::normalize(bitangent, epsilon);
}
#else
if (length(bitangent) < epsilon)
{
bitangent = cross(tangent, normal);
nvCheck(isNormalized(bitangent));
}
else
{
tangent = nv::normalize(tangent);
bitangent = nv::normalize(bitangent);
Vector3 bisector = nv::normalize(tangent + bitangent);
Vector3 axis = cross(bisector, normal);
nvDebugCheck(isNormalized(axis, epsilon));
nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
if (dot(axis, tangent) > 0)
{
tangent = nv::normalize(bisector + axis);
bitangent = nv::normalize(bisector - axis);
}
else
{
tangent = nv::normalize(bisector - axis);
bitangent = nv::normalize(bisector + axis);
}
tangent = ::normalize(tangent, epsilon);
}
#endif
}
/*// Check vector lengths.
if (!isNormalized(normal, epsilon))
{
nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z());
nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z());
nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z());
}*/
// Check vector lengths.
nvCheck(isNormalized(normal, epsilon));
nvCheck(isNormalized(tangent, epsilon));
nvCheck(isNormalized(bitangent, epsilon));
@ -162,18 +125,9 @@ void Basis::buildFrameForDirection(Vector3::Arg d)
bitangent = cross(normal, tangent);
}
bool Basis::isValid() const
{
if (equal(normal, Vector3(zero))) return false;
if (equal(tangent, Vector3(zero))) return false;
if (equal(bitangent, Vector3(zero))) return false;
if (equal(determinant(), 0.0f)) return false;
return true;
}
/*
/// Transform by this basis. (From this basis to object space).
Vector3 Basis::transform(Vector3::Arg v) const
{
@ -190,31 +144,30 @@ Vector3 Basis::transformT(Vector3::Arg v)
}
/// Transform by the inverse. (From object space to this basis).
/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned.
/// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
Vector3 Basis::transformI(Vector3::Arg v) const
{
const float det = determinant();
nvDebugCheck(!equal(det, 0.0f, 0.0f));
nvCheck(!equalf(det, 0.0f));
const float idet = 1.0f / det;
// Rows of the inverse matrix.
Vector3 r0(
(bitangent.y() * normal.z() - bitangent.z() * normal.y()),
-(bitangent.x() * normal.z() - bitangent.z() * normal.x()),
(bitangent.x() * normal.y() - bitangent.y() * normal.x()));
Vector3 r1(
-(tangent.y() * normal.z() - tangent.z() * normal.y()),
(tangent.x() * normal.z() - tangent.z() * normal.x()),
-(tangent.x() * normal.y() - tangent.y() * normal.x()));
Vector3 r2(
(tangent.y() * bitangent.z() - tangent.z() * bitangent.y()),
-(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()),
(tangent.x() * bitangent.y() - tangent.y() * bitangent.x()));
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet;
Vector3 r0, r1, r2;
r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;
r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;
r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
}
*/

@ -54,8 +54,7 @@ namespace nv
tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
}
bool isValid() const;
/*
// Get transform matrix for this basis.
NVMATH_API Matrix matrix() const;
@ -67,7 +66,7 @@ namespace nv
// Transform by the inverse. (From object space to this basis).
NVMATH_API Vector3 transformI(Vector3::Arg v) const;
*/
Vector3 tangent;
Vector3 bitangent;

@ -9,7 +9,6 @@
namespace nv
{
class Stream;
/// Axis Aligned Bounding Box.
class Box
@ -28,13 +27,11 @@ public:
// Cast operators.
operator const float * () const { return reinterpret_cast<const float *>(this); }
// Min corner of the box.
Vector3 minCorner() const { return m_mins; }
Vector3 & minCorner() { return m_mins; }
/// Min corner of the box.
Vector3 mins() const { return m_mins; }
// Max corner of the box.
Vector3 maxCorner() const { return m_maxs; }
Vector3 & maxCorner() { return m_maxs; }
/// Max corner of the box.
Vector3 maxs() const { return m_maxs; }
/// Clear the bounds.
void clearBounds()
@ -111,7 +108,7 @@ public:
float area() const
{
const Vector3 d = extents();
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
}
/// Get the volume of the box.
@ -121,16 +118,6 @@ public:
return 8.0f * (d.x() * d.y() * d.z());
}
/// Return true if the box contains the given point.
bool contains(Vector3::Arg p) const
{
return
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
friend Stream & operator<< (Stream & s, Box & box);
private:
Vector3 m_mins;
@ -138,6 +125,15 @@ private:
};
/*
/// Point inside box test.
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
{
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
}
*/
} // nv namespace

@ -5,19 +5,13 @@ SET(MATH_SRCS
Vector.h
Matrix.h
Quaternion.h
Plane.h Plane.cpp
Box.h
Color.h
Montecarlo.h Montecarlo.cpp
Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp
Basis.h Basis.cpp
Triangle.h Triangle.cpp TriBox.cpp
Polygon.h Polygon.cpp
TypeSerialization.h TypeSerialization.cpp
Sparse.h Sparse.cpp
Solver.h Solver.cpp
KahanSum.h)
Triangle.h Triangle.cpp TriBox.cpp)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

@ -1,38 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_KAHANSUM_H
#define NV_MATH_KAHANSUM_H
#include <nvmath/nvmath.h>
namespace nv
{
class KahanSum
{
public:
KahanSum() : accum(0.0f), err(0) {};
void add(float f)
{
float compensated = f + err;
float tmp = accum + compensated;
err = accum - tmp;
err += compensated;
accum = tmp;
}
float sum() const
{
return accum;
}
private:
float accum;
float err;
};
} // nv namespace
#endif // NV_MATH_KAHANSUM_H

@ -1,168 +0,0 @@
// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
#include <nvmath/Polygon.h>
#include <nvmath/Triangle.h>
#include <nvmath/Plane.h>
using namespace nv;
Polygon::Polygon()
{
}
Polygon::Polygon(const Triangle & t)
{
pointArray.resize(3);
pointArray[0] = t.v[0];
pointArray[1] = t.v[1];
pointArray[2] = t.v[2];
}
Polygon::Polygon(const Vector3 * points, uint vertexCount)
{
pointArray.resize(vertexCount);
for (uint i = 0; i < vertexCount; i++)
{
pointArray[i] = points[i];
}
}
/// Compute polygon area.
float Polygon::area() const
{
float total = 0;
const uint pointCount = pointArray.count();
for (uint i = 2; i < pointCount; i++)
{
Vector3 v1 = pointArray[i-1] - pointArray[0];
Vector3 v2 = pointArray[i] - pointArray[0];
total += 0.5f * length(cross(v1, v2));
}
return total;
}
/// Get the bounds of the polygon.
Box Polygon::bounds() const
{
Box bounds;
bounds.clearBounds();
foreach(p, pointArray)
{
bounds.addPointToBounds(pointArray[p]);
}
return bounds;
}
/// Get the plane of the polygon.
Plane Polygon::plane() const
{
// @@ Do something better than this?
Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]);
return Vector4(n, dot(n, pointArray[0]));
}
/// Clip polygon to box.
uint Polygon::clipTo(const Box & box)
{
const Plane posX( 1, 0, 0, box.maxCorner().x());
const Plane negX(-1, 0, 0,-box.minCorner().x());
const Plane posY( 0, 1, 0, box.maxCorner().y());
const Plane negY( 0,-1, 0,-box.minCorner().y());
const Plane posZ( 0, 0, 1, box.maxCorner().z());
const Plane negZ( 0, 0,-1,-box.minCorner().z());
if (clipTo(posX) == 0) return 0;
if (clipTo(negX) == 0) return 0;
if (clipTo(posY) == 0) return 0;
if (clipTo(negY) == 0) return 0;
if (clipTo(posZ) == 0) return 0;
if (clipTo(negZ) == 0) return 0;
return pointArray.count();
}
/// Clip polygon to plane.
uint Polygon::clipTo(const Plane & plane)
{
int count = 0;
const uint pointCount = pointArray.count();
Array<Vector3> newPointArray(pointCount + 1); // @@ Do not create copy every time.
Vector3 prevPoint = pointArray[pointCount - 1];
float prevDist = dot(plane.vector(), prevPoint) - plane.offset();
for (uint i = 0; i < pointCount; i++)
{
const Vector3 point = pointArray[i];
float dist = dot(plane.vector(), point) - plane.offset();
// @@ Handle points on plane better.
if (dist <= 0) // interior.
{
if (prevDist > 0) // exterior
{
// Add segment intersection point.
Vector3 dp = point - prevPoint;
float t = dist / prevDist;
newPointArray.append(point - dp * t);
}
// Add interior point.
newPointArray.append(point);
}
else if (dist > 0 && prevDist < 0)
{
// Add segment intersection point.
Vector3 dp = point - prevPoint;
float t = dist / prevDist;
newPointArray.append(point - dp * t);
}
prevPoint = point;
prevDist = dist;
}
swap(pointArray, newPointArray);
return count;
}
void Polygon::removeColinearPoints()
{
const uint pointCount = pointArray.count();
Array<Vector3> newPointArray(pointCount);
for (uint i = 0 ; i < pointCount; i++)
{
int j = (i + 1) % pointCount;
int k = (i + pointCount - 1) % pointCount;
Vector3 v1 = normalize(pointArray[j] - pointArray[i]);
Vector3 v2 = normalize(pointArray[i] - pointArray[k]);
if (dot(v1, v2) < 0.999)
{
newPointArray.append(pointArray[i]);
}
}
swap(pointArray, newPointArray);
}

@ -1,45 +0,0 @@
// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
#ifndef NV_MATH_POLYGON_H
#define NV_MATH_POLYGON_H
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
#include <nvmath/Box.h>
namespace nv
{
class Box;
class Plane;
class Triangle;
class Polygon
{
NV_FORBID_COPY(Polygon);
public:
Polygon();
Polygon(const Triangle & t);
Polygon(const Vector3 * points, uint vertexCount);
float area() const;
Box bounds() const;
Plane plane() const;
uint clipTo(const Box & box);
uint clipTo(const Plane & plane);
void removeColinearPoints();
private:
Array<Vector3> pointArray;
};
} // nv namespace
#endif // NV_MATH_POLYGON_H

@ -51,6 +51,7 @@ namespace nv
inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
{
// @@ Efficient SIMD implementation?
return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
@ -58,40 +59,6 @@ namespace nv
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
}
inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
{
return Quaternion(
+ a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.z()*b.x() + a.w()*b.y(),
+ a.x() * b.y() - a.y()*b.x() + a.w()*b.z(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() );
}
inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
{
return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x(),
+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z());
}
inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
{
return mul(a, b);
}
inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
{
return mul(a, b);
}
inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
{
return mul(a, b);
}
inline Quaternion scale(Quaternion::Arg q, float s)
{
return scale(q.asVector(), s);
@ -155,24 +122,6 @@ namespace nv
return Quaternion(Vector4(v * s, c));
}
inline Vector3 imag(Quaternion::Arg q)
{
return q.asVector().xyz();
}
inline float real(Quaternion::Arg q)
{
return q.w();
}
/// Transform vector.
inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
{
Quaternion t = q * v * conjugate(q);
return imag(t);
}
} // nv namespace

@ -1,726 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Solver.h>
using namespace nv;
namespace
{
class Preconditioner
{
public:
// Virtual dtor.
virtual ~Preconditioner() { }
// Apply preconditioning step.
virtual void apply(const FullVector & x, FullVector & y) const = 0;
};
// Jacobi preconditioner.
class JacobiPreconditioner : public Preconditioner
{
public:
JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
{
nvCheck(M.isSquare());
for(uint x = 0; x < M.width(); x++)
{
float elem = M.getCoefficient(x, x);
nvDebugCheck( elem != 0.0f );
if (symmetric)
{
m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem));
}
else
{
m_inverseDiagonal[x] = 1.0f / elem;
}
}
}
void apply(const FullVector & x, FullVector & y) const
{
y *= x;
}
private:
FullVector m_inverseDiagonal;
};
} // namespace
static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
// Solve the symmetric system: At·A·x = At·b
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
{
nvDebugCheck(A.width() == x.dimension());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
const uint D = A.width();
FullVector Atb(D);
mult(Transposed, A, b, Atb);
SparseMatrix AtA(D);
mult(Transposed, A, NoTransposed, A, AtA);
SymmetricSolver(AtA, Atb, x, epsilon);
}
// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
{
nvDebugCheck(A.width() == x.dimension());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(A.height() >= A.width() - lockedCount);
// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
const uint D = A.width() - lockedCount;
nvDebugCheck(D > 0);
// Compute: b - Al * xl
FullVector b_Alxl(b);
for (uint y = 0; y < A.height(); y++)
{
const uint count = A.getRow(y).count();
for (uint e = 0; e < count; e++)
{
uint column = A.getRow(y)[e].x;
bool isFree = true;
for (uint i = 0; i < lockedCount; i++)
{
isFree &= (lockedParameters[i] != column);
}
if (!isFree)
{
b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
}
}
}
// Remove locked columns from A.
SparseMatrix Af(D, A.height());
for (uint y = 0; y < A.height(); y++)
{
const uint count = A.getRow(y).count();
for (uint e = 0; e < count; e++)
{
uint column = A.getRow(y)[e].x;
uint ix = column;
bool isFree = true;
for (uint i = 0; i < lockedCount; i++)
{
isFree &= (lockedParameters[i] != column);
if (column > lockedParameters[i]) ix--; // shift columns
}
if (isFree)
{
Af.setCoefficient(ix, y, A.getRow(y)[e].v);
}
}
}
// Remove elements from x
FullVector xf(D);
for (uint i = 0, j = 0; i < A.width(); i++)
{
bool isFree = true;
for (uint l = 0; l < lockedCount; l++)
{
isFree &= (lockedParameters[l] != i);
}
if (isFree)
{
xf[j++] = x[i];
}
}
// Solve reduced system.
LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
// Copy results back to x.
for (uint i = 0, j = 0; i < A.width(); i++)
{
bool isFree = true;
for (uint l = 0; l < lockedCount; l++)
{
isFree &= (lockedParameters[l] != i);
}
if (isFree)
{
x[i] = xf[j++];
}
}
}
void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
{
nvDebugCheck(A.height() == A.width());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(b.dimension() == x.dimension());
// JacobiPreconditioner jacobi(A, true);
// ConjugateGradientSolver(jacobi, A, b, x, epsilon);
ConjugateGradientSolver(A, b, x, epsilon);
}
/**
* Compute the solution of the sparse linear system Ab=x using the Conjugate
* Gradient method.
*
* Solving sparse linear systems:
* (1) A·x = b
*
* The conjugate gradient algorithm solves (1) only in the case that A is
* symmetric and positive definite. It is based on the idea of minimizing the
* function
*
* (2) f(x) = 1/2·x·A·x - b·x
*
* This function is minimized when its gradient
*
* (3) df = A·x - b
*
* is zero, which is equivalent to (1). The minimization is carried out by
* generating a succession of search directions p.k and improved minimizers x.k.
* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k·p.k),
* and x.k+1 is set equal to the new point x.k + alfa.k·p.k. The p.k and x.k are
* built up in such a way that x.k+1 is also the minimizer of f over the whole
* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
* iterations you arrive at the minimizer over the entire vector space, i.e., the
* solution to (1).
*
* For a really good explanation of the method see:
*
* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
* Jonhathan Richard Shewchuk.
*
**/
/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
{
nvDebugCheck( A.isSquare() );
nvDebugCheck( A.width() == b.dimension() );
nvDebugCheck( A.width() == x.dimension() );
int i = 0;
const int D = A.width();
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
FullVector r(D); // residual
FullVector p(D); // search direction
FullVector q(D); //
float delta_0;
float delta_old;
float delta_new;
float alpha;
float beta;
// r = b - A·x;
copy(b, r);
sgemv(-1, A, x, 1, r);
// p = r;
copy(r, p);
delta_new = dot( r, r );
delta_0 = delta_new;
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
{
i++;
// q = A·p
mult(A, p, q);
// alpha = delta_new / p·q
alpha = delta_new / dot( p, q );
// x = alfa·p + x
saxpy(alpha, p, x);
if ((i & 31) == 0) // recompute r after 32 steps
{
// r = b - A·x
copy(b, r);
sgemv(-1, A, x, 1, r);
}
else
{
// r = r - alpha·q
saxpy(-alpha, q, r);
}
delta_old = delta_new;
delta_new = dot( r, r );
beta = delta_new / delta_old;
// p = r + beta·p
copy(r, p);
saxpy(beta, p, r);
}
return i;
}
// Conjugate gradient with preconditioner.
/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
{
nvDebugCheck( A.isSquare() );
nvDebugCheck( A.width() == b.dimension() );
nvDebugCheck( A.width() == x.dimension() );
int i = 0;
const int D = A.width();
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
FullVector r(D); // residual
FullVector p(D); // search direction
FullVector q(D); //
FullVector s(D); // preconditioned
float delta_0;
float delta_old;
float delta_new;
float alpha;
float beta;
// r = b - A·x
copy(b, r);
sgemv(-1, A, x, 1, r);
// p = M^-1 · r
preconditioner.apply(r, p);
//copy(r, p);
delta_new = dot(r, p);
delta_0 = delta_new;
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
{
i++;
// q = A·p
mult(A, p, q);
// alpha = delta_new / p·q
alpha = delta_new / dot(p, q);
// x = alfa·p + x
saxpy(alpha, p, x);
if ((i & 31) == 0) // recompute r after 32 steps
{
// r = b - A·x
copy(b, r);
sgemv(-1, A, x, 1, r);
}
else
{
// r = r - alfa·q
saxpy(-alpha, q, r);
}
// s = M^-1 · r
preconditioner.apply(r, s);
//copy(r, s);
delta_old = delta_new;
delta_new = dot( r, s );
beta = delta_new / delta_old;
// p = s + beta·p
copy(s, p);
saxpy(beta, p, s);
}
return i;
}
#if 0 // Nonsymmetric solvers
/** Bi-conjugate gradient method. */
MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = 4 * D;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha;
float beta;
DenseVector r(D);
DenseVector rtilde(D);
DenseVector p(D);
DenseVector ptilde(D);
DenseVector q(D);
DenseVector qtilde(D);
DenseVector tmp(D); // temporal vector.
// r = b - A·x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
// p = r;
p.Set( r );
// ptilde = rtilde
ptilde.Set( rtilde );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i < i_max ) {
i++;
rho_1 = DenseVectorDotProduct( r, rtilde );
if( rho_1 == 0 ) {
// method fails.
return -i;
}
if (i == 1) {
p.Set( r );
ptilde.Set( rtilde );
}
else {
beta = rho_1 / rho_2;
// p = r + beta * p;
p.Mad( r, p, beta );
// ptilde = ztilde + beta * ptilde;
ptilde.Mad( rtilde, ptilde, beta );
}
// q = A * p;
A.Product( p, q );
// qtilde = A^t * ptilde;
A.TransProduct( ptilde, qtilde );
alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
// x += alpha * p;
x.Mad( x, p, alpha );
// r -= alpha * q;
r.Mad( r, q, -alpha );
// rtilde -= alpha * qtilde;
rtilde.Mad( rtilde, qtilde, -alpha );
rho_2 = rho_1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges
return i;
}
}
return i;
}
/** Bi-conjugate gradient stabilized method. */
int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = 2 * D;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha = 0;
float beta = 0;
float omega = 0;
DenseVector p(D);
DenseVector phat(D);
DenseVector s(D);
DenseVector shat(D);
DenseVector t(D);
DenseVector v(D);
DenseVector r(D);
DenseVector rtilde(D);
DenseVector tmp(D);
// r = b - A·x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i<i_max ) {
i++;
rho_1 = DenseVectorDotProduct( rtilde, r );
if( rho_1 == 0 ) {
// method fails
return -i;
}
if( i == 1 ) {
p.Set( r );
}
else {
beta = (rho_1 / rho_2) * (alpha / omega);
// p = r + beta * (p - omega * v);
p.Mad( p, v, -omega );
p.Mad( r, p, beta );
}
//phat = M.solve(p);
phat.Set( p );
//Precond( &phat, p );
//v = A * phat;
A.Product( phat, v );
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
// s = r - alpha * v;
s.Mad( r, v, -alpha );
resid = s.Norm() / normb;
if( resid < epsilon ) {
// x += alpha * phat;
x.Mad( x, phat, alpha );
return i;
}
//shat = M.solve(s);
shat.Set( s );
//Precond( &shat, s );
//t = A * shat;
A.Product( shat, t );
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
// x += alpha * phat + omega * shat;
x.Mad( x, shat, omega );
x.Mad( x, phat, alpha );
//r = s - omega * t;
r.Mad( s, t, -omega );
rho_2 = rho_1;
resid = r.Norm() / normb;
if( resid < epsilon ) {
return i;
}
if( omega == 0 ) {
return -i; // ???
}
}
return i;
}
/** Bi-conjugate gradient stabilized method. */
int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = D;
// const int i_max = 1000;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha = 0;
float beta = 0;
float omega = 0;
DenseVector p(D);
DenseVector phat(D);
DenseVector s(D);
DenseVector shat(D);
DenseVector t(D);
DenseVector v(D);
DenseVector r(D);
DenseVector rtilde(D);
DenseVector tmp(D);
// r = b - A·x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i<i_max ) {
i++;
rho_1 = DenseVectorDotProduct( rtilde, r );
if( rho_1 == 0 ) {
// method fails
return -i;
}
if( i == 1 ) {
p.Set( r );
}
else {
beta = (rho_1 / rho_2) * (alpha / omega);
// p = r + beta * (p - omega * v);
p.Mad( p, v, -omega );
p.Mad( r, p, beta );
}
//phat = M.solve(p);
//phat.Set( p );
M.Precond( &phat, p );
//v = A * phat;
A.Product( phat, v );
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
// s = r - alpha * v;
s.Mad( r, v, -alpha );
resid = s.Norm() / normb;
//printf( "--- Iteration %d: residual = %f\n", i, resid );
if( resid < epsilon ) {
// x += alpha * phat;
x.Mad( x, phat, alpha );
return i;
}
//shat = M.solve(s);
//shat.Set( s );
M.Precond( &shat, s );
//t = A * shat;
A.Product( shat, t );
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
// x += alpha * phat + omega * shat;
x.Mad( x, shat, omega );
x.Mad( x, phat, alpha );
//r = s - omega * t;
r.Mad( s, t, -omega );
rho_2 = rho_1;
resid = r.Norm() / normb;
if( resid < epsilon ) {
return i;
}
if( omega == 0 ) {
return -i; // ???
}
}
return i;
}
#endif

@ -1,20 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SOLVER_H
#define NV_MATH_SOLVER_H
#include <nvmath/Sparse.h>
namespace nv
{
// Linear solvers.
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
// NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
} // nv namespace
#endif // NV_MATH_SOLVER_H

@ -1,831 +0,0 @@
// This code is in the public domain -- Ignacio Castaño <castanyo@yahoo.es>
#include <nvmath/Sparse.h>
#include <nvmath/KahanSum.h>
using namespace nv;
/// Ctor.
FullVector::FullVector(uint dim)
{
m_array.resize(dim);
}
/// Copy ctor.
FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
{
}
/// Copy operator
const FullVector & FullVector::operator=(const FullVector & v)
{
nvCheck(dimension() == v.dimension());
m_array = v.m_array;
return *this;
}
void FullVector::fill(float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] = f;
}
}
void FullVector::operator+= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] += v.m_array[i];
}
}
void FullVector::operator-= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] -= v.m_array[i];
}
}
void FullVector::operator*= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] *= v.m_array[i];
}
}
void FullVector::operator+= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] += f;
}
}
void FullVector::operator-= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] -= f;
}
}
void FullVector::operator*= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] *= f;
}
}
void nv::saxpy(float a, const FullVector & x, FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
y[i] += a * x[i];
}
}
void nv::copy(const FullVector & x, FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
y[i] = x[i];
}
}
void nv::scal(float a, FullVector & x)
{
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
x[i] *= a;
}
}
float nv::dot(const FullVector & x, const FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
/*float sum = 0;
for (uint i = 0; i < dim; i++)
{
sum += x[i] * y[i];
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < dim; i++)
{
kahan.add(x[i] * y[i]);
}
return kahan.sum();
}
FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
{
m_array.resize(d*d, 0.0f);
}
FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
{
m_array.resize(w*h, 0.0f);
}
FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
{
m_array = m.m_array;
}
const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
{
nvCheck(width() == m.width());
nvCheck(height() == m.height());
m_array = m.m_array;
return *this;
}
float FullMatrix::getCoefficient(uint x, uint y) const
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
return m_array[y * width() + x];
}
void FullMatrix::setCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] = f;
}
void FullMatrix::addCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] += f;
}
void FullMatrix::mulCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] *= f;
}
float FullMatrix::dotRow(uint y, const FullVector & v) const
{
nvDebugCheck( v.dimension() == width() );
nvDebugCheck( y < height() );
float sum = 0;
const uint count = v.dimension();
for (uint i = 0; i < count; i++)
{
sum += m_array[y * count + i] * v[i];
}
return sum;
}
void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
{
nvDebugCheck( v.dimension() == width() );
nvDebugCheck( y < height() );
const uint count = v.dimension();
for (uint i = 0; i < count; i++)
{
v[i] += m_array[y * count + i];
}
}
// y = M * x
void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
{
mult(NoTransposed, M, x, y);
}
void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
{
const uint w = M.width();
const uint h = M.height();
if (TM == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
y.fill(0.0f);
for (uint i = 0; i < h; i++)
{
M.madRow(i, x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = M.dotRow(i, x);
}
}
}
// y = alpha*A*x + beta*y
void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
{
sgemv(alpha, NoTransposed, A, x, beta, y);
}
void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
{
const uint w = A.width();
const uint h = A.height();
if (TA == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
for (uint i = 0; i < h; i++)
{
A.madRow(i, alpha * x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
}
}
}
// Multiply a row of A by a column of B.
static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
{
const uint w = (TA == NoTransposed) ? A.width() : A.height();
nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width());
float sum = 0.0f;
for (uint k = 0; k < w; k++)
{
const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop?
const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
sum += a * b;
}
return sum;
}
// C = A * B
void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
{
mult(NoTransposed, A, NoTransposed, B, C);
}
void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
{
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
}
// C = alpha*A*B + beta*C
void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
{
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
}
void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
{
const uint w = C.width();
const uint h = C.height();
uint aw = (TA == NoTransposed) ? A.width() : A.height();
uint ah = (TA == NoTransposed) ? A.height() : A.width();
uint bw = (TB == NoTransposed) ? B.width() : B.height();
uint bh = (TB == NoTransposed) ? B.height() : B.width();
nvDebugCheck(aw == bh);
nvDebugCheck(bw == ah);
nvDebugCheck(w == bw);
nvDebugCheck(h == ah);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
C.setCoefficient(x, y, c);
}
}
}
/// Ctor. Init the size of the sparse matrix.
SparseMatrix::SparseMatrix(uint d) : m_width(d)
{
m_array.resize(d);
}
/// Ctor. Init the size of the sparse matrix.
SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
{
m_array.resize(h);
}
SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
{
m_array = m.m_array;
}
const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
{
nvCheck(width() == m.width());
nvCheck(height() == m.height());
m_array = m.m_array;
return *this;
}
// x is column, y is row
float SparseMatrix::getCoefficient(uint x, uint y) const
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x) return m_array[y][i].v;
}
return 0.0f;
}
void SparseMatrix::setCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v = f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
void SparseMatrix::addCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v += f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
void SparseMatrix::mulCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v *= f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
float SparseMatrix::sumRow(uint y) const
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
/*float sum = 0;
for (uint i = 0; i < count; i++)
{
sum += m_array[y][i].v;
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
kahan.add(m_array[y][i].v);
}
return kahan.sum();
}
float SparseMatrix::dotRow(uint y, const FullVector & v) const
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
/*float sum = 0;
for (uint i = 0; i < count; i++)
{
sum += m_array[y][i].v * v[m_array[y][i].x];
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
}
return kahan.sum();
}
void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
{
nvDebugCheck(y < height());
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
v[m_array[y][i].x] += alpha * m_array[y][i].v;
}
}
void SparseMatrix::clearRow(uint y)
{
nvDebugCheck( y < height() );
m_array[y].clear();
}
void SparseMatrix::scaleRow(uint y, float f)
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
m_array[y][i].v *= f;
}
}
void SparseMatrix::normalizeRow(uint y)
{
nvDebugCheck( y < height() );
float norm = 0.0f;
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
float f = m_array[y][i].v;
norm += f * f;
}
scaleRow(y, 1.0f / sqrtf(norm));
}
void SparseMatrix::clearColumn(uint x)
{
nvDebugCheck(x < width());
for (uint y = 0; y < height(); y++)
{
const uint count = m_array[y].count();
for (uint e = 0; e < count; e++)
{
if (m_array[y][e].x == x)
{
m_array[y][e].v = 0.0f;
break;
}
}
}
}
void SparseMatrix::scaleColumn(uint x, float f)
{
nvDebugCheck(x < width());
for (uint y = 0; y < height(); y++)
{
const uint count = m_array[y].count();
for (uint e = 0; e < count; e++)
{
if (m_array[y][e].x == x)
{
m_array[y][e].v *= f;
break;
}
}
}
}
const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
{
return m_array[y];
}
// y = M * x
void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
{
mult(NoTransposed, M, x, y);
}
void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
{
const uint w = M.width();
const uint h = M.height();
if (TM == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
y.fill(0.0f);
for (uint i = 0; i < h; i++)
{
M.madRow(i, x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = M.dotRow(i, x);
}
}
}
// y = alpha*A*x + beta*y
void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
{
sgemv(alpha, NoTransposed, A, x, beta, y);
}
void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
{
const uint w = A.width();
const uint h = A.height();
if (TA == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
for (uint i = 0; i < h; i++)
{
A.madRow(i, alpha * x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
}
}
}
// dot y-row of A by x-column of B
static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
const uint count = row.count();
/*float sum = 0.0f;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
sum += c.v * B.getCoefficient(x, c.x);
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
kahan.add(c.v * B.getCoefficient(x, c.x));
}
return kahan.sum();
}
// dot y-row of A by x-row of B
static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
const uint count = row.count();
/*float sum = 0.0f;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
sum += c.v * B.getCoefficient(c.x, x);
}
//return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
kahan.add(c.v * B.getCoefficient(c.x, x));
}
return kahan.sum();
}
// dot y-column of A by x-column of B
static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
nvDebugCheck(A.height() == B.height());
const uint h = A.height();
/*float sum = 0.0f;
for (uint i = 0; i < h; i++)
{
sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
}
//return sum;*/
KahanSum kahan;
for (uint i = 0; i < h; i++)
{
kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
}
return kahan.sum();
}
// C = A * B
void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
{
mult(NoTransposed, A, NoTransposed, B, C);
}
void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
{
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
}
// C = alpha*A*B + beta*C
void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
{
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
}
void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
{
const uint w = C.width();
const uint h = C.height();
uint aw = (TA == NoTransposed) ? A.width() : A.height();
uint ah = (TA == NoTransposed) ? A.height() : A.width();
uint bw = (TB == NoTransposed) ? B.width() : B.height();
uint bh = (TB == NoTransposed) ? B.height() : B.width();
nvDebugCheck(aw == bh);
nvDebugCheck(bw == ah);
nvDebugCheck(w == bw);
nvDebugCheck(h == ah);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
float c = beta * C.getCoefficient(x, y);
if (TA == NoTransposed && TB == NoTransposed)
{
// dot y-row of A by x-column of B.
c += alpha * dotRowColumn(y, A, x, B);
}
else if (TA == Transposed && TB == Transposed)
{
// dot y-column of A by x-row of B.
c += alpha * dotRowColumn(x, B, y, A);
}
else if (TA == Transposed && TB == NoTransposed)
{
// dot y-column of A by x-column of B.
c += alpha * dotColumnColumn(y, A, x, B);
}
else if (TA == NoTransposed && TB == Transposed)
{
// dot y-row of A by x-row of B.
c += alpha * dotRowRow(y, A, x, B);
}
if (c != 0.0f)
{
C.setCoefficient(x, y, c);
}
}
}
}
// C = At * A
void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
{
// This is quite expensive...
mult(Transposed, A, NoTransposed, A, C);
}

@ -1,198 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SPARSE_H
#define NV_MATH_SPARSE_H
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
// Full and sparse vector and matrix classes. BLAS subset.
namespace nv
{
class FullVector;
class FullMatrix;
class SparseMatrix;
/// Fixed size vector class.
class FullVector
{
public:
FullVector(uint dim);
FullVector(const FullVector & v);
const FullVector & operator=(const FullVector & v);
uint dimension() const { return m_array.count(); }
const float & operator[]( uint index ) const { return m_array[index]; }
float & operator[] ( uint index ) { return m_array[index]; }
void fill(float f);
void operator+= (const FullVector & v);
void operator-= (const FullVector & v);
void operator*= (const FullVector & v);
void operator+= (float f);
void operator-= (float f);
void operator*= (float f);
private:
Array<float> m_array;
};
// Pseudo-BLAS interface.
NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y
NVMATH_API void copy(const FullVector & x, FullVector & y);
NVMATH_API void scal(float a, FullVector & x);
NVMATH_API float dot(const FullVector & x, const FullVector & y);
enum Transpose
{
NoTransposed = 0,
Transposed = 1
};
/// Full matrix class.
class FullMatrix
{
public:
FullMatrix(uint d);
FullMatrix(uint w, uint h);
FullMatrix(const FullMatrix & m);
const FullMatrix & operator=(const FullMatrix & m);
uint width() const { return m_width; }
uint height() const { return m_height; }
bool isSquare() const { return m_width == m_height; }
float getCoefficient(uint x, uint y) const;
void setCoefficient(uint x, uint y, float f);
void addCoefficient(uint x, uint y, float f);
void mulCoefficient(uint x, uint y, float f);
float dotRow(uint y, const FullVector & v) const;
void madRow(uint y, float alpha, FullVector & v) const;
protected:
bool isValid() const {
return m_array.size() == (m_width * m_height);
}
private:
const uint m_width;
const uint m_height;
Array<float> m_array;
};
NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
// y = alpha*A*x + beta*y
NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
// C = alpha*A*B + beta*C
NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
/**
* Sparse matrix class. The matrix is assumed to be sparse and to have
* very few non-zero elements, for this reason it's stored in indexed
* format. To multiply column vectors efficiently, the matrix stores
* the elements in indexed-column order, there is a list of indexed
* elements for each row of the matrix. As with the FullVector the
* dimension of the matrix is constant.
**/
class SparseMatrix
{
friend class FullMatrix;
public:
// An element of the sparse array.
struct Coefficient {
uint x; // column
float v; // value
};
public:
SparseMatrix(uint d);
SparseMatrix(uint w, uint h);
SparseMatrix(const SparseMatrix & m);
const SparseMatrix & operator=(const SparseMatrix & m);
uint width() const { return m_width; }
uint height() const { return m_array.count(); }
bool isSquare() const { return width() == height(); }
float getCoefficient(uint x, uint y) const; // x is column, y is row
void setCoefficient(uint x, uint y, float f);
void addCoefficient(uint x, uint y, float f);
void mulCoefficient(uint x, uint y, float f);
float sumRow(uint y) const;
float dotRow(uint y, const FullVector & v) const;
void madRow(uint y, float alpha, FullVector & v) const;
void clearRow(uint y);
void scaleRow(uint y, float f);
void normalizeRow(uint y);
void clearColumn(uint x);
void scaleColumn(uint x, float f);
const Array<Coefficient> & getRow(uint y) const;
private:
/// Number of columns.
const uint m_width;
/// Array of matrix elements.
Array< Array<Coefficient> > m_array;
};
NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
// y = alpha*A*x + beta*y
NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
// C = alpha*A*B + beta*C
NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
// C = At * A
NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
} // nv namespace
#endif // NV_MATH_SPARSE_H

@ -3,7 +3,6 @@
#ifndef NV_MATH_SPHERICALHARMONIC_H
#define NV_MATH_SPHERICALHARMONIC_H
#include <string.h> // memcpy
#include <nvmath/Vector.h>
namespace nv

@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg
if(min>rad || max<-rad) return false;
bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{
// use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions:
@ -170,7 +170,7 @@ bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const T
}
bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{
// use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions:

@ -1,82 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Stream.h>
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
#include <nvmath/Quaternion.h>
#include <nvmath/Basis.h>
#include <nvmath/Box.h>
#include <nvmath/TypeSerialization.h>
using namespace nv;
Stream & nv::operator<< (Stream & s, Vector2 & v)
{
float x = v.x();
float y = v.y();
s << x << y;
if (s.isLoading())
{
v.set(x, y);
}
return s;
}
Stream & nv::operator<< (Stream & s, Vector3 & v)
{
float x = v.x();
float y = v.y();
float z = v.z();
s << x << y << z;
if (s.isLoading())
{
v.set(x, y, z);
}
return s;
}
Stream & nv::operator<< (Stream & s, Vector4 & v)
{
float x = v.x();
float y = v.y();
float z = v.z();
float w = v.w();
s << x << y << z << w;
if (s.isLoading())
{
v.set(x, y, z, w);
}
return s;
}
Stream & nv::operator<< (Stream & s, Matrix & m)
{
return s;
}
Stream & nv::operator<< (Stream & s, Quaternion & q)
{
return s << q.asVector();
}
Stream & nv::operator<< (Stream & s, Basis & basis)
{
return s << basis.tangent << basis.bitangent << basis.normal;
}
Stream & nv::operator<< (Stream & s, Box & box)
{
return s << box.m_mins << box.m_maxs;
}

@ -1,32 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_TYPESERIALIZATION_H
#define NV_MATH_TYPESERIALIZATION_H
#include <nvmath/nvmath.h>
namespace nv
{
class Stream;
class Vector2;
class Vector3;
class Vector4;
class Matrix;
class Quaternion;
struct Basis;
class Box;
NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
NVMATH_API Stream & operator<< (Stream & s, Box & obj);
} // nv namespace
#endif // NV_MATH_TYPESERIALIZATION_H

@ -4,7 +4,7 @@
#define NV_MATH_VECTOR_H
#include <nvmath/nvmath.h>
#include <nvcore/Algorithms.h> // min, max
#include <nvcore/Containers.h> // min, max
namespace nv
{
@ -71,7 +71,6 @@ public:
const Vector2 & xy() const;
scalar component(uint idx) const;
void setComponent(uint idx, scalar f);
const scalar * ptr() const;
@ -240,21 +239,13 @@ inline const Vector2 & Vector3::xy() const
inline scalar Vector3::component(uint idx) const
{
nvDebugCheck(idx < 3);
if (idx == 0) return m_x;
if (idx == 1) return m_y;
if (idx == 2) return m_z;
if (idx == 0) return x();
if (idx == 1) return y();
if (idx == 2) return z();
nvAssume(false);
return 0.0f;
}
inline void Vector3::setComponent(uint idx, float f)
{
nvDebugCheck(idx < 3);
if (idx == 0) m_x = f;
else if (idx == 1) m_y = f;
else if (idx == 2) m_z = f;
}
inline const scalar * Vector3::ptr() const
{
return &m_x;
@ -486,35 +477,6 @@ inline scalar length(Vector2::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector2::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
}
inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
{
float l = length(v);
nvDebugCheck(!isZero(l, epsilon));
Vector2 n = scale(v, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
{
float l = length(v);
if (isZero(l, epsilon)) {
return fallback;
}
return scale(v, 1.0f / l);
}
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
{
return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
@ -633,11 +595,6 @@ inline scalar length(Vector3::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector3::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
@ -759,11 +716,6 @@ inline scalar length(Vector4::Arg v)
return sqrtf(length_squared(v));
}
inline scalar inverse_length(Vector4::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);

@ -136,11 +136,6 @@ inline float lerp(float f0, float f1, float t)
return f0 * s + f1 * t;
}
inline float square(float f)
{
return f * f;
}
} // nv
#endif // NV_MATH_H

@ -79,9 +79,6 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)

@ -56,7 +56,7 @@ namespace
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) {
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
@ -71,9 +71,6 @@ namespace
else if (format == Format_BC5) {
return 16;
}
else if (format == Format_CTX1) {
return 8;
}
return 0;
}
@ -348,7 +345,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
@ -369,10 +366,6 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_CTX1) {
header.setFourCC('C', 'T', 'X', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
@ -429,7 +422,7 @@ bool Compressor::Private::compressMipmaps(uint f, const InputOptions::Private &
quantizeMipmap(mipmap, compressionOptions);
compressMipmap(mipmap, inputOptions, compressionOptions, outputOptions);
compressMipmap(mipmap, compressionOptions, outputOptions);
// Compute extents of next mipmap:
w = max(1U, w / 2);
@ -478,11 +471,6 @@ bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Privat
// Convert linear float image to fixed image ready for compression.
mipmap.toFixedImage(inputOptions);
if (inputOptions.premultiplyAlpha)
{
premultiplyAlphaMipmap(mipmap, inputOptions);
}
return true;
}
@ -587,29 +575,6 @@ void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Priva
}
void Compressor::Private::premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
Image * image = mipmap.asMutableFixedImage();
const uint w = image->width();
const uint h = image->height();
const uint count = w * h;
for (uint i = 0; i < count; ++i)
{
Color32 c = image->pixel(i);
c.r = (uint(c.r) * uint(c.a)) >> 8;
c.g = (uint(c.g) * uint(c.a)) >> 8;
c.b = (uint(c.b) * uint(c.a)) >> 8;
image->pixel(i) = c;
}
}
// Process an input image: Convert to normal map, normalize, or convert to linear space.
void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
@ -689,7 +654,7 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
// Compress the given mipmap.
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{
const Image * image = mipmap.asFixedImage();
@ -725,7 +690,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1(image, compressionOptions, outputOptions);
cuda->compressDXT1(image, outputOptions, compressionOptions);
}
else
{
@ -752,18 +717,6 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
}
}
}
else if (compressionOptions.format == Format_DXT1n)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1n(image, compressionOptions, outputOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
@ -775,7 +728,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT3(image, inputOptions, compressionOptions, outputOptions);
cuda->compressDXT3(image, outputOptions, compressionOptions);
}
else
{
@ -794,7 +747,7 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT5(image, inputOptions, compressionOptions, outputOptions);
cuda->compressDXT5(image, outputOptions, compressionOptions);
}
else
{
@ -821,18 +774,6 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{
compressBC5(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_CTX1)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressCTX1(image, compressionOptions, outputOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
return true;
}

@ -58,10 +58,9 @@ namespace nvtt
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
bool compressMipmap(const Mipmap & mipmap, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;
public:

@ -23,8 +23,6 @@
#include <string.h> // memcpy
#include <nvcore/Containers.h> // nextPowerOfTwo
#include <nvcore/Memory.h>
#include "nvtt.h"
@ -120,8 +118,6 @@ void InputOptions::reset()
m.maxExtent = 0;
m.roundMode = RoundMode_None;
m.premultiplyAlpha = false;
}
@ -309,19 +305,6 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
//m.linearTransform.setRow(channel, w);
}
void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
{
nvCheck(x >= 0 && x < 3);
nvCheck(y >= 0 && y < 3);
nvCheck(z >= 0 && z < 3);
nvCheck(w >= 0 && w < 3);
// m.xswizzle = x;
// m.yswizzle = y;
// m.zswizzle = z;
// m.wswizzle = w;
}
void InputOptions::setMaxExtents(int e)
{
nvDebugCheck(e > 0);
@ -333,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
m.roundMode = mode;
}
void InputOptions::setPremultiplyAlpha(bool b)
{
m.premultiplyAlpha = b;
}
void InputOptions::Private::computeTargetExtents() const
{

@ -78,8 +78,6 @@ namespace nvtt
uint maxExtent;
RoundMode roundMode;
bool premultiplyAlpha;
// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
mutable uint targetWidth;
mutable uint targetHeight;
@ -91,7 +89,7 @@ namespace nvtt
int realMipmapCount() const;
const nv::Image * image(uint face, uint mipmap) const;
const nv::Image * image(uint idx) const;
const nv::Image * image(uint idx) const;
};

@ -21,8 +21,6 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>

@ -205,43 +205,6 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16])
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float dps[16];
if (idx < 16)
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
// No need to synchronize, 16 < warp size.
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
float2 axis = bestFitLine(colors, sums[0]);
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float2 tmp = colors[idx];
colors[xrefs[idx]] = tmp;
}
}
////////////////////////////////////////////////////////////////////////////////
// Round color to RGB565 and expand
@ -258,26 +221,6 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
return v;
}
inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5);
v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f;
return v;
}
inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 255.0f);
v.y = rintf(__saturatef(v.y) * 255.0f);
*w = ((ushort)v.x << 8) | ((ushort)v.y);
v.x *= 1.0f / 255.0f;
v.y *= 1.0f / 255.0f;
return v;
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate permutations
@ -521,114 +464,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
}
*/
__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable3[bits & 3] * colors[i];
akku += prods3[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 4.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 4.0f) * (e.x + e.y);
}
__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 8-8 color and expand...
a = roundAndExpand88(a, start);
b = roundAndExpand88(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate all permutations
@ -757,67 +592,6 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
}
*/
__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
__shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
for(int i = 0; i < 3; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break;
ushort start, end;
uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
errors[idx] = bestError;
}
__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
@ -852,39 +626,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
errors[idx] = bestError;
}
__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
////////////////////////////////////////////////////////////////////////////////
@ -996,11 +737,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
result[bid].y = indices;
}
__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
{
saveBlockDXT1(start, end, permutation, xrefs, result);
}
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
{
const int bid = blockIdx.x;
@ -1092,61 +828,6 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
}
__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
/*
__device__ float computeError(const float weights[16], uchar a0, uchar a1)
{
@ -1356,13 +1037,3 @@ extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint *
{
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}

File diff suppressed because it is too large Load Diff

@ -39,11 +39,9 @@ namespace nv
bool isValid() const;
void compressDXT1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
private:

@ -87,64 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ __host__ float2 operator *(float f, float2 v)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator *(float2 v, float f)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator +(float2 a, float2 b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ __host__ void operator +=(float2 & b, float2 a)
{
b.x += a.x;
b.y += a.y;
}
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ __host__ void operator -=(float2 & b, float2 a)
{
b.x -= a.x;
b.y -= a.y;
}
inline __device__ __host__ float2 operator /(float2 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float2 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
}
inline __device__ __host__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
@ -301,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
return firstEigenVector(covariance);
}
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[3] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H

@ -1,60 +0,0 @@
#include "nvtt_experimental.h"
struct NvttImage
{
NvttImage() :
m_constant(false),
m_image(NULL),
m_floatImage(NULL)
{
}
~NvttImage()
{
if (m_constant && m_image) m_image->unwrap();
delete m_image;
delete m_floatImage;
}
bool m_constant;
Image * m_image;
FloatImage * m_floatImage;
};
NvttImage * nvttCreateImage()
{
return new NvttImage();
}
void nvttDestroyImage(NvttImage * img)
{
delete img;
}
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data)
{
nvCheck(img != NULL);
if (format == NVTT_InputFormat_BGRA_8UB)
{
img->m_constant = false;
img->m_image->allocate(w, h);
memcpy(img->m_image->pixels(), data, w * h * 4);
}
else
{
nvCheck(false);
}
}
void nvttCompressImage(NvttImage * img, NvttFormat format)
{
nvCheck(img != NULL);
// @@ Invoke appropriate compressor.
}
#endif // NVTT_EXPERIMENTAL_H

@ -1,55 +0,0 @@
#ifndef NVTT_EXPERIMENTAL_H
#define NVTT_EXPERIMENTAL_H
#include <nvtt/nvtt.h>
typedef struct NvttImage NvttImage;
NvttImage * nvttCreateImage();
void nvttDestroyImage(NvttImage * img);
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data);
void nvttCompressImage(NvttImage * img, NvttFormat format);
// How to control the compression parameters?
// Using many arguments:
// void nvttCompressImage(img, format, quality, r, g, b, a, ...);
// Using existing compression option class:
// compressionOptions = nvttCreateCompressionOptions();
// nvttSetCompressionOptionsFormat(compressionOptions, format);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a);
// ...
// nvttCompressImage(img, compressionOptions);
// Using thread local context state:
// void nvttSetCompressionFormat(format);
// void nvttSetCompressionQuality(quality);
// void nvttSetCompressionColorWeights(r, g, b, a);
// ...
// nvttCompressImage(img);
// Using thread local context state, but with GL style function arguments:
// nvttCompressorParameteri(NVTT_FORMAT, format);
// nvttCompressorParameteri(NVTT_QUALITY, quality);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a);
// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a);
// ...
// nvttCompressImage(img);
// How do we get the compressed output?
// - Using callbacks. (via new entrypoints, or through outputOptions)
// - Return it explicitely from nvttCompressImage.
// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)'
#endif // NVTT_EXPERIMENTAL_H

@ -83,9 +83,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
Format_DXT1n,
Format_CTX1,
};
/// Quality modes.
@ -157,7 +154,6 @@ namespace nvtt
{
ColorTransform_None,
ColorTransform_Linear,
ColorTransform_Swizzle
};
/// Extents rounding mode.
@ -222,14 +218,10 @@ namespace nvtt
// Set color transforms. @@ Not implemented!
NVTT_API void setColorTransform(ColorTransform t);
NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3);
NVTT_API void setSwizzleTransform(int x, int y, int z, int w);
// Set resizing options.
NVTT_API void setMaxExtents(int d);
NVTT_API void setRoundMode(RoundMode mode);
// Set whether or not to premultiply color by alpha
NVTT_API void setPremultiplyAlpha(bool b);
};

@ -1,221 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdio.h> // printf
#include <stdlib.h> // rand
#include <time.h> // clock
#include <string.h> // memcpy, memcmp
#include <assert.h>
#define FRAME_COUNT 1000
#define WIDTH 2048
#define HEIGHT 2048
#define INPUT_SIZE (WIDTH*HEIGHT)
#define OUTPUT_SIZE (WIDTH*HEIGHT/16*2)
static int s_input[INPUT_SIZE];
static int s_reference[OUTPUT_SIZE];
static int s_output[OUTPUT_SIZE];
static int s_frame = 0;
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_ptr(NULL) {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
assert(size == sizeof(int) * OUTPUT_SIZE);
assert(width == WIDTH);
assert(height == HEIGHT);
assert(depth == 1);
assert(face == 0);
assert(miplevel == 0);
m_ptr = (unsigned char *)s_output;
if (s_frame == 1)
{
// Save first result as reference.
memcpy(s_reference, s_output, sizeof(int) * OUTPUT_SIZE);
}
else if (s_frame > 1)
{
// Compare against reference.
if (memcmp(s_output, s_reference, sizeof(int) * OUTPUT_SIZE) != 0)
{
printf("Compressed image different to original.\n");
exit(EXIT_FAILURE);
}
}
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
unsigned char * m_ptr;
};
void precomp()
{
unsigned int bitmaps[1024];
int num = 0;
printf("{\n");
printf("\t%8X,\n", 0);
bitmaps[0] = 0;
num = 1;
for (int a = 1; a <= 15; a++)
{
for (int b = a; b <= 15; b++)
{
for (int c = b; c <= 15; c++)
{
int indices[16];
int i = 0;
for(; i < a; i++) {
indices[i] = 0;
}
for(; i < a+b; i++) {
indices[i] = 2;
}
for(; i < a+b+c; i++) {
indices[i] = 3;
}
for(; i < 16; i++) {
indices[i] = 1;
}
unsigned int bm = 0;
for(i = 0; i < 16; i++) {
bm |= indices[i] << (i * 2);
}
printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
bitmaps[num] = bm;
num++;
}
}
}
printf("}\n");
printf("// num = %d\n", num);
/*
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
}
*/
}
int main(int argc, char *argv[])
{
//precomp();
nvtt::InputOptions inputOptions;
inputOptions.setTextureLayout(nvtt::TextureType_2D, WIDTH, HEIGHT);
for (int i = 0; i < INPUT_SIZE; i++)
{
s_input[i] = rand();
}
inputOptions.setMipmapData(s_input, WIDTH, HEIGHT);
inputOptions.setMipmapGeneration(false);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_DXT1);
// compressionOptions.setFormat(nvtt::Format_DXT1n);
// compressionOptions.setFormat(nvtt::Format_CTX1);
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Compressor compressor;
for (s_frame = 0; s_frame < FRAME_COUNT; s_frame++)
{
clock_t start = clock();
printf("compressing frame %d:\n", s_frame);
compressor.process(inputOptions, compressionOptions, outputOptions);
clock_t end = clock();
printf("time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
}
return EXIT_SUCCESS;
}

@ -140,7 +140,6 @@ int main(int argc, char *argv[])
bool silent = false;
bool bc1n = false;
nvtt::Format format = nvtt::Format_BC1;
bool premultiplyAlpha = false;
const char * externalCompressor = NULL;
@ -174,10 +173,6 @@ int main(int argc, char *argv[])
{
noMipmaps = true;
}
else if (strcmp("-premula", argv[i]) == 0)
{
premultiplyAlpha = true;
}
// Compression options.
else if (strcmp("-fast", argv[i]) == 0)
@ -271,8 +266,7 @@ int main(int argc, char *argv[])
printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n");
printf(" -premula \tPremultiply alpha into color channel.\n\n");
printf(" -nomips \tDisable mipmap generation.\n\n");
printf("Compression options:\n");
printf(" -fast \tFast compression.\n");
@ -379,11 +373,6 @@ int main(int argc, char *argv[])
inputOptions.setMipmapGeneration(false);
}
if (premultiplyAlpha)
{
inputOptions.setPremultiplyAlpha(true);
inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied);
}
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format);

@ -31,145 +31,41 @@
#include "cmdline.h"
#include <time.h> // clock
int main(int argc, char *argv[])
{
MyAssertHandler assertHandler;
MyMessageHandler messageHandler;
bool forcenormal = false;
bool mipmaps = false;
bool faces = false;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{
if (strcmp("-forcenormal", argv[i]) == 0)
{
forcenormal = true;
}
else if (strcmp("-mipmaps", argv[i]) == 0)
{
mipmaps = true;
}
else if (strcmp("-faces", argv[i]) == 0)
{
faces = true;
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-')
{
output = argv[i+1];
}
else
{
output.copy(input.str());
output.stripExtension();
output.append(".tga");
}
break;
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
if (input.isNull())
if (argc != 2)
{
printf("usage: nvdecompress [options] infile [outfile]\n\n");
printf("Note: the .tga extension is forced on outfile\n\n");
printf("Input options:\n");
printf(" -forcenormal \tThe input image is a normal map.\n");
printf(" -mipmaps \tDecompress all mipmaps.\n");
printf(" -faces \tDecompress all faces.\n");
return 1;
}
// Load surface.
nv::DirectDrawSurface dds(input);
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str());
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nvdecompress 'ddsfile'\n\n");
return 1;
}
if (!dds.isSupported() || dds.isTexture3D())
// Load surface.
nv::DirectDrawSurface dds(argv[1]);
if (!dds.isValid())
{
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
printf("The file '%s' is not a valid DDS file.\n", argv[1]);
return 1;
}
uint faceCount;
if (dds.isTexture2D())
{
faceCount = 1;
}
else
{
nvCheck(dds.isTextureCube());
faceCount = 6;
}
uint mipmapCount = dds.mipmapCount();
nv::Path name(argv[1]);
name.stripExtension();
name.append(".tga");
clock_t start = clock();
// apply arguments
if (forcenormal)
{
dds.setNormalFlag(true);
}
if (!faces)
{
faceCount = 1;
}
if (!mipmaps)
{
mipmapCount = 1;
nv::StdOutputStream stream(name.str());
if (stream.isError()) {
printf("Error opening '%s' for writting\n", name.str());
return 1;
}
nv::Image mipmap;
nv::Path name;
// strip extension, we force the tga extension
output.stripExtension();
// extract faces and mipmaps
for (uint f = 0; f < faceCount; f++)
{
for (uint m = 0; m < mipmapCount; m++)
{
dds.mipmap(&mipmap, f, m);
// set output filename, if we are doing faces and/or mipmaps
name.copy(output);
if (faces) name.appendFormat("_face%d", f);
if (mipmaps) name.appendFormat("_mipmap%d", m);
name.append(".tga");
nv::StdOutputStream stream(name.str());
if (stream.isError()) {
fprintf(stderr, "Error opening '%s' for writting\n", name.str());
return 1;
}
nv::ImageIO::saveTGA(stream, &mipmap);
}
}
// @@ TODO: Add command line options to output mipmaps, cubemap faces, etc.
nv::Image img;
dds.mipmap(&img, 0, 0); // get first image
nv::ImageIO::saveTGA(stream, &img);
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0;
}

Loading…
Cancel
Save