43 Commits
2.0.0 ... 2.0.2

Author SHA1 Message Date
1df4bb6980 Fix changelog. 2008-04-17 09:04:57 +00:00
0294c4ad93 Tag 2.0.2 release. 2008-04-17 08:59:21 +00:00
34ae5bcb6f Merge 2.0 branch fixes. 2008-04-17 07:17:46 +00:00
fe130a9906 Add DXT1a single color compressor. 2008-04-17 07:00:51 +00:00
bade8e5e09 Merge private branch. 2008-04-17 07:00:19 +00:00
141a05edf4 Merge private branch. 2008-04-17 06:59:29 +00:00
7d3facd81a Merge private branch. 2008-04-17 06:59:13 +00:00
17a4f765fb Merge private branch. 2008-04-17 06:58:43 +00:00
cb91740591 Merge private branch. 2008-04-17 06:58:18 +00:00
d10295fbf6 Use DXT1a single color compressor. 2008-04-17 06:55:26 +00:00
fa5e1f5a07 Add single color DXT1a compressor. 2008-04-17 06:54:29 +00:00
9d47e100f1 Add better support for the DX10 DDS formats. 2008-04-11 23:58:41 +00:00
db1b30ee4b Update changelog. 2008-04-11 22:04:59 +00:00
4c759f999c Integrate decompressor tool improvements submitted by Amorilia. 2008-04-11 22:03:42 +00:00
299ad176fc Add experimental image based interface. 2008-04-11 08:06:15 +00:00
5070cc98d3 Do not use constructor that initializes POD types. 2008-04-11 06:50:36 +00:00
133ebfb282 Remove unused parameter warnings.
Do not compile tokenizer; it's not being used, and does not work on win64 yet.
2008-04-09 09:06:19 +00:00
ebe8054728 Cache HAVE_* variables so that they can be edited through the cmake gui. 2008-04-06 05:59:13 +00:00
aa14653d96 Do not cache CUDA_FOUND variable. 2008-04-06 05:54:53 +00:00
389adb5368 Update change log, merge 2.0 changes, add attributions. 2008-03-27 04:45:11 +00:00
bd3314f4af Add inputOptions argument to compressors, so that they can access alpha mode. 2008-03-27 04:28:17 +00:00
065c5f0689 Cleanup simple compressors. Move code from FastCompress to QuickCompress. 2008-03-20 01:39:02 +00:00
cc8656f12b Update project files. 2008-03-14 08:42:24 +00:00
d2384cf47f Remove unused methods. 2008-03-14 08:40:48 +00:00
aff59c22b8 remove unused compressors 2008-03-14 08:40:11 +00:00
59be16d40a Remove unused fitting code. 2008-03-14 08:39:03 +00:00
b7a724448b Remove unnecesary dependency. 2008-03-14 07:32:59 +00:00
259e7c58fd Merge Viktor Linder patch into 2.0 and trunk.
Fixes RGB modes with less than 32 bpp.
2008-03-11 21:22:54 +00:00
307c8b99ee Add support for premultiplied alpha. Patch by Charles Nicholson. 2008-03-07 00:41:03 +00:00
6b933c4f62 Fix post-build command. Copy headers to include/nvtt/. 2008-03-06 20:28:43 +00:00
fd1ac3c61f Add quotes around post build command arguments. Reported by Richard Sim. 2008-03-05 23:26:12 +00:00
65aa7e1eaa Add interface for swizzle color xform. 2008-03-05 22:35:16 +00:00
f5ae4c1a9a Fix indexMirror error reported by Chris Lambert. 2008-03-05 19:42:45 +00:00
75c09220c8 Fix Image copy ctor bug reported by Richard Sim. 2008-03-05 19:11:41 +00:00
9f4b4bd532 Update comments about hole filling algorithms. 2008-03-04 00:13:44 +00:00
bce983f39e Add post build command to copy header files. 2008-02-28 22:07:08 +00:00
ff93ad41cb Fix end of lines. 2008-02-28 21:45:46 +00:00
56c7771100 Fix end of lines. 2008-02-28 21:45:26 +00:00
ccced843e3 Use smaller allocations to prevent errors.
Check for allocation errors.
2008-02-28 21:45:04 +00:00
dafe2b8841 Hide copy ctor and operator to prevent compiler warnings.
Wrap pimpl using NVTT_DECLARE_PIMPL macro.
2008-02-28 21:14:40 +00:00
e3e7fcb226 Check cuda errors to find out whether the cuda context initialization succeeded. 2008-02-28 17:52:32 +00:00
970395fba8 Fix osx build. 2008-02-28 17:02:29 +00:00
8a24a93e2f Disable CUDA when memory allocations fail. 2008-02-28 16:06:27 +00:00
50 changed files with 1348 additions and 3811 deletions

View File

@ -1,11 +1,18 @@
NVIDIA Texture Tools version 2.1.0
* CTX1 CUDA compressor.
* DXT1n CUDA compressor.
NVIDIA Texture Tools version 2.0.2
* Fix copy ctor error reported by Richard Sim.
* Fix indexMirror error reported by Chris Lambert.
* Fix vc8 post build command, reported by Richard Sim.
* Fix RGBA modes with less than 32 bpp by Viktor Linder.
* Fix alpha decompression by Amorilia. See issue 40.
* Avoid default-initialized constructors for POD types, reported by Jim Tilander.
* Add single color compresor for DXT1a.
* Set swizzle code to ATI2 files. See issue 41.
NVIDIA Texture Tools version 2.0.1
* Fix memory leaks.
* Pre-allocate device memory for CUDA compressor.
* Add single color compressor.
* Add single color compressor. Thanks to Amir Ebrahimi.
* Better CUDA error checking.
NVIDIA Texture Tools version 2.0.0
* Fixed PSNR formula in nvimgdiff.

View File

@ -2,7 +2,7 @@
--------------------------------------------------------------------------------
NVIDIA Texture Tools
README.txt
Version 2.1
Version 2.0
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------

View File

@ -1 +1 @@
2.1.0
2.0.2

View File

@ -46,9 +46,9 @@ FIND_LIBRARY (CUDA_RUNTIME_LIBRARY
DOC "The CUDA runtime library")
IF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_FOUND 1 CACHE STRING "Set to 1 if CUDA is found, 0 otherwise")
SET (CUDA_FOUND TRUE)
ELSE (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_FOUND 0 CACHE STRING "Set to 1 if CUDA is found, 0 otherwise")
SET (CUDA_FOUND FALSE)
ENDIF (CUDA_INCLUDE_PATH AND CUDA_RUNTIME_LIBRARY)
SET (CUDA_LIBRARIES ${CUDA_RUNTIME_LIBRARY})

0
gnuwin32/bin/libpng12.dll Executable file → Normal file
View File

BIN
gnuwin32/lib/libpng.a Normal file

Binary file not shown.

BIN
gnuwin32/lib/libz.a Normal file

Binary file not shown.

View File

@ -278,6 +278,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17"
/>
<Tool

View File

@ -277,6 +277,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17"
/>
<Tool

View File

@ -278,11 +278,7 @@
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvmath\Eigen.cpp"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Fitting.cpp"
RelativePath="..\..\..\src\nvmath\Plane.cpp"
>
</File>
</Filter>
@ -299,18 +295,14 @@
RelativePath="..\..\..\src\nvmath\Color.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Eigen.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Fitting.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Matrix.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Plane.h"
>
</File>
<File
RelativePath="..\..\..\src\nvmath\Vector.h"
>

View File

@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
@ -34,16 +34,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
ProjectSection(ProjectDependencies) = postProject
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
@ -55,18 +55,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
ProjectSection(ProjectDependencies) = postProject
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
EndProjectSection
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU
@ -319,22 +314,6 @@ Global
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -53,8 +53,8 @@ END
//
VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,1,0,0
PRODUCTVERSION 2,1,0,0
FILEVERSION 2,0,2,0
PRODUCTVERSION 2,0,2,0
FILEFLAGSMASK 0x17L
#ifdef _DEBUG
FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN
VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 1, 0, 0"
VALUE "FileVersion", "2, 0, 2, 0"
VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 1, 0, 0"
VALUE "ProductVersion", "2, 0, 2, 0"
END
END
BLOCK "VarFileInfo"

View File

@ -96,6 +96,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -258,6 +260,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -420,6 +424,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -578,6 +584,8 @@
/>
<Tool
Name="VCPostBuildEventTool"
Description="Copying header files..."
CommandLine="xcopy /y /f /i &quot;$(SolutionDir)\..\..\src\nvtt\nvtt*.h&quot; &quot;$(SolutionDir)\$(ConfigurationName).$(PlatformName)\include\nvtt\&quot;"
/>
</Configuration>
<Configuration
@ -683,7 +691,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -693,7 +701,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -703,7 +711,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>
@ -713,7 +721,7 @@
>
<Tool
Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj"
/>

View File

@ -1,201 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="stress"
ProjectGUID="{317B694E-B5C1-42A6-956F-FC12B69175A6}"
RootNamespace="stress"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvtt\tests\stress.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -50,6 +50,7 @@ ENDIF(CG_FOUND)
# CUDA
INCLUDE(${NV_CMAKE_DIR}/FindCUDA.cmake)
IF(CUDA_FOUND)
SET(HAVE_CUDA ${CUDA_FOUND} CACHE BOOL "Set to TRUE if CUDA is found, FALSE otherwise")
MESSAGE(STATUS "Looking for CUDA - found")
ELSE(CUDA_FOUND)
MESSAGE(STATUS "Looking for CUDA - not found")
@ -58,7 +59,7 @@ ENDIF(CUDA_FOUND)
# Maya
INCLUDE(${NV_CMAKE_DIR}/FindMaya.cmake)
IF(MAYA_FOUND)
SET(HAVE_MAYA MAYA_FOUND)
SET(HAVE_MAYA ${MAYA_FOUND} CACHE BOOL "Set to TRUE if Maya is found, FALSE otherwise")
MESSAGE(STATUS "Looking for Maya - found")
ELSE(MAYA_FOUND)
MESSAGE(STATUS "Looking for Maya - not found")
@ -67,7 +68,7 @@ ENDIF(MAYA_FOUND)
# JPEG
INCLUDE(FindJPEG)
IF(JPEG_FOUND)
SET(HAVE_JPEG JPEG_FOUND)
SET(HAVE_JPEG ${JPEG_FOUND} CACHE BOOL "Set to TRUE if JPEG is found, FALSE otherwise")
MESSAGE(STATUS "Looking for JPEG - found")
ELSE(JPEG_FOUND)
MESSAGE(STATUS "Looking for JPEG - not found")
@ -76,7 +77,7 @@ ENDIF(JPEG_FOUND)
# PNG
INCLUDE(FindPNG)
IF(PNG_FOUND)
SET(HAVE_PNG PNG_FOUND)
SET(HAVE_PNG ${PNG_FOUND} CACHE BOOL "Set to TRUE if PNG is found, FALSE otherwise")
MESSAGE(STATUS "Looking for PNG - found")
ELSE(PNG_FOUND)
MESSAGE(STATUS "Looking for PNG - not found")
@ -85,7 +86,7 @@ ENDIF(PNG_FOUND)
# TIFF
INCLUDE(FindTIFF)
IF(TIFF_FOUND)
SET(HAVE_TIFF TIFF_FOUND)
SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise")
MESSAGE(STATUS "Looking for TIFF - found")
ELSE(TIFF_FOUND)
MESSAGE(STATUS "Looking for TIFF - not found")
@ -94,7 +95,7 @@ ENDIF(TIFF_FOUND)
# OpenEXR
INCLUDE(${NV_CMAKE_DIR}/FindOpenEXR.cmake)
IF(OPENEXR_FOUND)
SET(HAVE_OPENEXR OPENEXR_FOUND)
SET(HAVE_OPENEXR ${OPENEXR_FOUND} CACHE BOOL "Set to TRUE if OpenEXR is found, FALSE otherwise")
MESSAGE(STATUS "Looking for OpenEXR - found")
ELSE(OPENEXR_FOUND)
MESSAGE(STATUS "Looking for OpenEXR - not found")

View File

@ -18,8 +18,6 @@ SET(CORE_SRCS
TextReader.cpp
TextWriter.h
TextWriter.cpp
Tokenizer.h
Tokenizer.cpp
Radix.h
Radix.cpp)

View File

@ -446,7 +446,7 @@ namespace nv
// Call default constructors
for( i = old_size; i < new_size; i++ ) {
new(m_buffer+i) T(); // placement new
new(m_buffer+i) T; // placement new
}
}

View File

@ -74,7 +74,9 @@ namespace
// TODO write minidump
static LONG WINAPI nvTopLevelFilter( struct _EXCEPTION_POINTERS *pExceptionInfo ) {
static LONG WINAPI nvTopLevelFilter( struct _EXCEPTION_POINTERS * pExceptionInfo)
{
NV_UNUSED(pExceptionInfo);
/* BOOL (WINAPI * Dump) (HANDLE, DWORD, HANDLE, MINIDUMP_TYPE, PMINIDUMP_EXCEPTION_INFORMATION, PMINIDUMP_USER_STREAM_INFORMATION, PMINIDUMP_CALLBACK_INFORMATION );
AutoString dbghelp_path(512);

View File

@ -18,6 +18,8 @@ void * nv::mem::malloc(size_t size)
void * nv::mem::malloc(size_t size, const char * file, int line)
{
NV_UNUSED(file);
NV_UNUSED(line);
return ::malloc(size);
}

View File

@ -307,15 +307,6 @@ void ColorBlock::boundsRangeAlpha(Color32 * start, Color32 * end) const
}
void ColorBlock::bestFitRange(Color32 * start, Color32 * end) const
{
nvDebugCheck(start != NULL);
nvDebugCheck(end != NULL);
Vector3 axis = bestFitLine().direction();
computeRange(axis, start, end);
}
/// Sort colors by abosolute value in their 16 bit representation.
void ColorBlock::sortColorsByAbsoluteValue()
{
@ -393,19 +384,6 @@ void ColorBlock::sortColors(const Vector3 & axis)
}
/// Get least squares line that best approxiamtes the points of the color block.
Line3 ColorBlock::bestFitLine() const
{
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
pointArray.append(Vector3(m_color[i].r, m_color[i].g, m_color[i].b));
}
return Fit::bestLine(pointArray);
}
/// Get the volume of the color block.
float ColorBlock::volume() const
{

View File

@ -4,7 +4,6 @@
#define NV_IMAGE_COLORBLOCK_H
#include <nvmath/Color.h>
#include <nvmath/Fitting.h> // Line3
namespace nv
{
@ -33,16 +32,13 @@ namespace nv
void luminanceRange(Color32 * start, Color32 * end) const;
void boundsRange(Color32 * start, Color32 * end) const;
void boundsRangeAlpha(Color32 * start, Color32 * end) const;
void bestFitRange(Color32 * start, Color32 * end) const;
void sortColorsByAbsoluteValue();
void computeRange(const Vector3 & axis, Color32 * start, Color32 * end) const;
void sortColors(const Vector3 & axis);
Line3 bestFitLine() const;
float volume() const;
Line3 diameterLine() const;
// Accessors
const Color32 * colors() const;

View File

@ -54,6 +54,10 @@ namespace
static const uint FOURCC_ATI1 = MAKEFOURCC('A', 'T', 'I', '1');
static const uint FOURCC_ATI2 = MAKEFOURCC('A', 'T', 'I', '2');
static const uint FOURCC_A2XY = MAKEFOURCC('A', '2', 'X', 'Y');
static const uint FOURCC_DX10 = MAKEFOURCC('D', 'X', '1', '0');
// 32 bit RGB formats.
static const uint D3DFMT_R8G8B8 = 20;
static const uint D3DFMT_A8R8G8B8 = 21;
@ -253,6 +257,144 @@ namespace
D3D10_RESOURCE_DIMENSION_TEXTURE3D = 4,
};
const char * getDxgiFormatString(DXGI_FORMAT dxgiFormat)
{
#define CASE(format) case DXGI_FORMAT_##format: return #format
switch(dxgiFormat)
{
CASE(UNKNOWN);
CASE(R32G32B32A32_TYPELESS);
CASE(R32G32B32A32_FLOAT);
CASE(R32G32B32A32_UINT);
CASE(R32G32B32A32_SINT);
CASE(R32G32B32_TYPELESS);
CASE(R32G32B32_FLOAT);
CASE(R32G32B32_UINT);
CASE(R32G32B32_SINT);
CASE(R16G16B16A16_TYPELESS);
CASE(R16G16B16A16_FLOAT);
CASE(R16G16B16A16_UNORM);
CASE(R16G16B16A16_UINT);
CASE(R16G16B16A16_SNORM);
CASE(R16G16B16A16_SINT);
CASE(R32G32_TYPELESS);
CASE(R32G32_FLOAT);
CASE(R32G32_UINT);
CASE(R32G32_SINT);
CASE(R32G8X24_TYPELESS);
CASE(D32_FLOAT_S8X24_UINT);
CASE(R32_FLOAT_X8X24_TYPELESS);
CASE(X32_TYPELESS_G8X24_UINT);
CASE(R10G10B10A2_TYPELESS);
CASE(R10G10B10A2_UNORM);
CASE(R10G10B10A2_UINT);
CASE(R11G11B10_FLOAT);
CASE(R8G8B8A8_TYPELESS);
CASE(R8G8B8A8_UNORM);
CASE(R8G8B8A8_UNORM_SRGB);
CASE(R8G8B8A8_UINT);
CASE(R8G8B8A8_SNORM);
CASE(R8G8B8A8_SINT);
CASE(R16G16_TYPELESS);
CASE(R16G16_FLOAT);
CASE(R16G16_UNORM);
CASE(R16G16_UINT);
CASE(R16G16_SNORM);
CASE(R16G16_SINT);
CASE(R32_TYPELESS);
CASE(D32_FLOAT);
CASE(R32_FLOAT);
CASE(R32_UINT);
CASE(R32_SINT);
CASE(R24G8_TYPELESS);
CASE(D24_UNORM_S8_UINT);
CASE(R24_UNORM_X8_TYPELESS);
CASE(X24_TYPELESS_G8_UINT);
CASE(R8G8_TYPELESS);
CASE(R8G8_UNORM);
CASE(R8G8_UINT);
CASE(R8G8_SNORM);
CASE(R8G8_SINT);
CASE(R16_TYPELESS);
CASE(R16_FLOAT);
CASE(D16_UNORM);
CASE(R16_UNORM);
CASE(R16_UINT);
CASE(R16_SNORM);
CASE(R16_SINT);
CASE(R8_TYPELESS);
CASE(R8_UNORM);
CASE(R8_UINT);
CASE(R8_SNORM);
CASE(R8_SINT);
CASE(A8_UNORM);
CASE(R1_UNORM);
CASE(R9G9B9E5_SHAREDEXP);
CASE(R8G8_B8G8_UNORM);
CASE(G8R8_G8B8_UNORM);
CASE(BC1_TYPELESS);
CASE(BC1_UNORM);
CASE(BC1_UNORM_SRGB);
CASE(BC2_TYPELESS);
CASE(BC2_UNORM);
CASE(BC2_UNORM_SRGB);
CASE(BC3_TYPELESS);
CASE(BC3_UNORM);
CASE(BC3_UNORM_SRGB);
CASE(BC4_TYPELESS);
CASE(BC4_UNORM);
CASE(BC4_SNORM);
CASE(BC5_TYPELESS);
CASE(BC5_UNORM);
CASE(BC5_SNORM);
CASE(B5G6R5_UNORM);
CASE(B5G5R5A1_UNORM);
CASE(B8G8R8A8_UNORM);
CASE(B8G8R8X8_UNORM);
default:
return "UNKNOWN";
}
#undef CASE
}
const char * getD3d10ResourceDimensionString(D3D10_RESOURCE_DIMENSION resourceDimension)
{
switch(resourceDimension)
{
default:
case D3D10_RESOURCE_DIMENSION_UNKNOWN: return "UNKNOWN";
case D3D10_RESOURCE_DIMENSION_BUFFER: return "BUFFER";
case D3D10_RESOURCE_DIMENSION_TEXTURE1D: return "TEXTURE1D";
case D3D10_RESOURCE_DIMENSION_TEXTURE2D: return "TEXTURE2D";
case D3D10_RESOURCE_DIMENSION_TEXTURE3D: return "TEXTURE3D";
}
}
} // namespace
namespace nv
@ -390,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (0 << 16) | (9 << 8) | (5); // major.minor.revision
this->reserved[10] = (2 << 16) | (0 << 8) | (2); // major.minor.revision
this->pf.size = 32;
this->pf.flags = 0;
@ -494,7 +636,16 @@ void DDSHeader::setFourCC(uint8 c0, uint8 c1, uint8 c2, uint8 c3)
// set fourcc pixel format.
this->pf.flags = DDPF_FOURCC;
this->pf.fourcc = MAKEFOURCC(c0, c1, c2, c3);
this->pf.bitcount = 0;
if (this->pf.fourcc == FOURCC_ATI2)
{
this->pf.bitcount = FOURCC_A2XY;
}
else
{
this->pf.bitcount = 0;
}
this->pf.rmask = 0;
this->pf.gmask = 0;
this->pf.bmask = 0;
@ -530,9 +681,9 @@ void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask
nvCheck(bitcount > 0 && bitcount <= 32);
// Align to 8.
if (bitcount < 8) bitcount = 8;
else if (bitcount < 16) bitcount = 16;
else if (bitcount < 24) bitcount = 24;
if (bitcount <= 8) bitcount = 8;
else if (bitcount <= 16) bitcount = 16;
else if (bitcount <= 24) bitcount = 24;
else bitcount = 32;
this->pf.fourcc = 0; //findD3D9Format(bitcount, rmask, gmask, bmask, amask);
@ -545,7 +696,8 @@ void DDSHeader::setPixelFormat(uint bitcount, uint rmask, uint gmask, uint bmask
void DDSHeader::setDX10Format(uint format)
{
this->pf.flags = 0;
//this->pf.flags = 0;
this->pf.fourcc = FOURCC_DX10;
this->header10.dxgiFormat = format;
}
@ -593,7 +745,8 @@ void DDSHeader::swapBytes()
bool DDSHeader::hasDX10Header() const
{
return this->pf.flags == 0;
return this->pf.fourcc == FOURCC_DX10; // @@ This is according to AMD
//return this->pf.flags == 0; // @@ This is according to MS
}
@ -623,7 +776,7 @@ bool DirectDrawSurface::isValid() const
return false;
}
const uint required = (DDSD_WIDTH|DDSD_HEIGHT|DDSD_CAPS|DDSD_PIXELFORMAT);
const uint required = (DDSD_WIDTH|DDSD_HEIGHT/*|DDSD_CAPS|DDSD_PIXELFORMAT*/);
if( (header.flags & required) != required ) {
return false;
}
@ -643,40 +796,46 @@ bool DirectDrawSurface::isSupported() const
{
nvDebugCheck(isValid());
if (header.pf.flags & DDPF_FOURCC)
if (header.hasDX10Header())
{
if (header.pf.fourcc != FOURCC_DXT1 &&
header.pf.fourcc != FOURCC_DXT2 &&
header.pf.fourcc != FOURCC_DXT3 &&
header.pf.fourcc != FOURCC_DXT4 &&
header.pf.fourcc != FOURCC_DXT5 &&
header.pf.fourcc != FOURCC_RXGB &&
header.pf.fourcc != FOURCC_ATI1 &&
header.pf.fourcc != FOURCC_ATI2)
{
// Unknown fourcc code.
return false;
}
}
else if (header.pf.flags & DDPF_RGB)
{
// All RGB formats are supported now.
}
else
{
return false;
}
if (isTextureCube() && (header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES)
{
// Cubemaps must contain all faces.
return false;
}
if (isTexture3D())
{
// @@ 3D textures not supported yet.
return false;
if (header.pf.flags & DDPF_FOURCC)
{
if (header.pf.fourcc != FOURCC_DXT1 &&
header.pf.fourcc != FOURCC_DXT2 &&
header.pf.fourcc != FOURCC_DXT3 &&
header.pf.fourcc != FOURCC_DXT4 &&
header.pf.fourcc != FOURCC_DXT5 &&
header.pf.fourcc != FOURCC_RXGB &&
header.pf.fourcc != FOURCC_ATI1 &&
header.pf.fourcc != FOURCC_ATI2)
{
// Unknown fourcc code.
return false;
}
}
else if (header.pf.flags & DDPF_RGB)
{
// All RGB formats are supported now.
}
else
{
return false;
}
if (isTextureCube() && (header.caps.caps2 & DDSCAPS2_CUBEMAP_ALL_FACES) != DDSCAPS2_CUBEMAP_ALL_FACES)
{
// Cubemaps must contain all faces.
return false;
}
if (isTexture3D())
{
// @@ 3D textures not supported yet.
return false;
}
}
return true;
@ -712,16 +871,40 @@ uint DirectDrawSurface::depth() const
else return 1;
}
bool DirectDrawSurface::isTexture1D() const
{
nvDebugCheck(isValid());
if (header.hasDX10Header())
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE1D;
}
return false;
}
bool DirectDrawSurface::isTexture2D() const
{
nvDebugCheck(isValid());
return !isTexture3D() && !isTextureCube();
if (header.hasDX10Header())
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE2D;
}
else
{
return !isTexture3D() && !isTextureCube();
}
}
bool DirectDrawSurface::isTexture3D() const
{
nvDebugCheck(isValid());
return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0;
if (header.hasDX10Header())
{
return header.header10.resourceDimension == D3D10_RESOURCE_DIMENSION_TEXTURE3D;
}
else
{
return (header.caps.caps2 & DDSCAPS2_VOLUME) != 0;
}
}
bool DirectDrawSurface::isTextureCube() const
@ -730,6 +913,12 @@ bool DirectDrawSurface::isTextureCube() const
return (header.caps.caps2 & DDSCAPS2_CUBEMAP) != 0;
}
void DirectDrawSurface::setNormalFlag(bool b)
{
nvDebugCheck(isValid());
header.setNormalFlag(b);
}
void DirectDrawSurface::mipmap(Image * img, uint face, uint mipmap)
{
nvDebugCheck(isValid());
@ -780,7 +969,13 @@ void DirectDrawSurface::readLinearImage(Image * img)
uint byteCount = (header.pf.bitcount + 7) / 8;
if (header.pf.amask != 0)
// set image format: RGB or ARGB
// alpha channel exists if and only if the alpha mask is non-zero
if (header.pf.amask == 0)
{
img->setFormat(Image::Format_RGB);
}
else
{
img->setFormat(Image::Format_ARGB);
}
@ -808,7 +1003,20 @@ void DirectDrawSurface::readBlockImage(Image * img)
{
nvDebugCheck(stream != NULL);
nvDebugCheck(img != NULL);
// set image format: RGB or ARGB
if (header.pf.fourcc == FOURCC_RXGB ||
header.pf.fourcc == FOURCC_ATI1 ||
header.pf.fourcc == FOURCC_ATI2 ||
header.pf.flags & DDPF_NORMAL)
{
img->setFormat(Image::Format_RGB);
}
else
{
img->setFormat(Image::Format_ARGB);
}
const uint w = img->width();
const uint h = img->height();
@ -1044,8 +1252,23 @@ void DirectDrawSurface::printInfo() const
if (header.pf.flags & DDPF_ALPHAPREMULT) printf("\t\tDDPF_ALPHAPREMULT\n");
if (header.pf.flags & DDPF_NORMAL) printf("\t\tDDPF_NORMAL\n");
printf("\tFourCC: '%c%c%c%c'\n", ((header.pf.fourcc >> 0) & 0xFF), ((header.pf.fourcc >> 8) & 0xFF), ((header.pf.fourcc >> 16) & 0xFF), ((header.pf.fourcc >> 24) & 0xFF));
printf("\tBit count: %d\n", header.pf.bitcount);
printf("\tFourCC: '%c%c%c%c'\n",
((header.pf.fourcc >> 0) & 0xFF),
((header.pf.fourcc >> 8) & 0xFF),
((header.pf.fourcc >> 16) & 0xFF),
((header.pf.fourcc >> 24) & 0xFF));
if ((header.pf.fourcc & DDPF_FOURCC) && (header.pf.bitcount != 0))
{
printf("\tSwizzle: '%c%c%c%c'\n",
(header.pf.bitcount >> 0) & 0xFF,
(header.pf.bitcount >> 8) & 0xFF,
(header.pf.bitcount >> 16) & 0xFF,
(header.pf.bitcount >> 24) & 0xFF);
}
else
{
printf("\tBit count: %d\n", header.pf.bitcount);
}
printf("\tRed mask: 0x%.8X\n", header.pf.rmask);
printf("\tGreen mask: 0x%.8X\n", header.pf.gmask);
printf("\tBlue mask: 0x%.8X\n", header.pf.bmask);
@ -1076,11 +1299,11 @@ void DirectDrawSurface::printInfo() const
printf("\tCaps 3: 0x%.8X\n", header.caps.caps3);
printf("\tCaps 4: 0x%.8X\n", header.caps.caps4);
if (header.pf.flags == 0)
if (header.hasDX10Header())
{
printf("DX10 Header:\n");
printf("\tDXGI Format: %u\n", header.header10.dxgiFormat);
printf("\tResource dimension: %u\n", header.header10.resourceDimension);
printf("\tDXGI Format: %u (%s)\n", header.header10.dxgiFormat, getDxgiFormatString((DXGI_FORMAT)header.header10.dxgiFormat));
printf("\tResource dimension: %u (%s)\n", header.header10.resourceDimension, getD3d10ResourceDimensionString((D3D10_RESOURCE_DIMENSION)header.header10.resourceDimension));
printf("\tMisc flag: %u\n", header.header10.miscFlag);
printf("\tArray size: %u\n", header.header10.arraySize);
}

View File

@ -119,9 +119,12 @@ namespace nv
uint width() const;
uint height() const;
uint depth() const;
bool isTexture1D() const;
bool isTexture2D() const;
bool isTexture3D() const;
bool isTextureCube() const;
void setNormalFlag(bool b);
void mipmap(Image * img, uint f, uint m);
// void mipmap(FloatImage * img, uint f, uint m);

View File

@ -228,12 +228,12 @@ inline uint FloatImage::indexMirror(int x, int y) const
{
x = abs(x);
while (x >= m_width) {
x = m_width + m_width - x - 2;
x = abs(m_width + m_width - x - 2);
}
y = abs(y);
while (y >= m_height) {
y = m_height + m_height - y - 2;
y = abs(m_height + m_height - y - 2);
}
return index(x, y);

View File

@ -296,7 +296,7 @@ static bool downsample(const FloatImage * src, const BitMap * srcMask, const Flo
return true;
}
// This is the filter used in the Lumigraph paper. The Unreal engine uses something similar.
// This is the filter used in the Lumigraph paper.
void nv::fillPullPush(FloatImage * img, const BitMap * bmap)
{
nvCheck(img != NULL);
@ -644,8 +644,8 @@ struct LocalPixels
// This is a cubic extrapolation filter from Charles Bloom (DoPixelSeamFix).
void nv::fillCubicExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/)
// This is a quadratic extrapolation filter from Charles Bloom (DoPixelSeamFix). Used with his permission.
void nv::fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex /*= -1*/)
{
nvCheck(passCount > 0);
nvCheck(img != NULL);

View File

@ -89,7 +89,7 @@ namespace nv
NVIMAGE_API void fillPullPush(FloatImage * img, const BitMap * bmap);
NVIMAGE_API void fillExtrapolate(int passCount, FloatImage * img, BitMap * bmap);
NVIMAGE_API void fillCubicExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1);
NVIMAGE_API void fillQuadraticExtrapolate(int passCount, FloatImage * img, BitMap * bmap, int coverageIndex = -1);
} // nv namespace

View File

@ -15,7 +15,7 @@ Image::Image() : m_width(0), m_height(0), m_format(Format_RGB), m_data(NULL)
{
}
Image::Image(const Image & img)
Image::Image(const Image & img) : m_data(NULL)
{
allocate(img.m_width, img.m_height);
m_format = img.m_format;

View File

@ -7,8 +7,6 @@ SET(MATH_SRCS
Quaternion.h
Box.h
Color.h
Eigen.h Eigen.cpp
Fitting.h Fitting.cpp
Montecarlo.h Montecarlo.cpp
Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp

View File

@ -1,533 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Eigen.h"
using namespace nv;
static const float EPS = 0.00001f;
static const int MAX_ITER = 100;
static void semi_definite_symmetric_eigen(const float *mat, int n, float *eigen_vec, float *eigen_val);
// Use power method to find the first eigenvector.
// http://www.miislita.com/information-retrieval-tutorial/matrix-tutorial-3-eigenvalues-eigenvectors.html
Vector3 nv::firstEigenVector(float matrix[6])
{
// Number of iterations. @@ Use a variable number of iterations.
const int NUM = 8;
Vector3 v(1, 1, 1);
for(int i = 0; i < NUM; i++) {
float x = v.x() * matrix[0] + v.y() * matrix[1] + v.z() * matrix[2];
float y = v.x() * matrix[1] + v.y() * matrix[3] + v.z() * matrix[4];
float z = v.x() * matrix[2] + v.y() * matrix[4] + v.z() * matrix[5];
float norm = max(max(x, y), z);
float iv = 1.0f / norm;
if (norm == 0.0f) {
return Vector3(zero);
}
v.set(x*iv, y*iv, z*iv);
}
return v;
}
/// Solve eigen system.
void Eigen::solve() {
semi_definite_symmetric_eigen(matrix, N, eigen_vec, eigen_val);
}
/// Solve eigen system.
void Eigen3::solve() {
// @@ Use lengyel code that seems to be more optimized.
#if 1
float v[3*3];
semi_definite_symmetric_eigen(matrix, 3, v, eigen_val);
eigen_vec[0].set(v[0], v[1], v[2]);
eigen_vec[1].set(v[3], v[4], v[5]);
eigen_vec[2].set(v[6], v[7], v[8]);
#else
const int maxSweeps = 32;
const float epsilon = 1.0e-10f;
float m11 = matrix[0]; // m(0,0);
float m12 = matrix[1]; // m(0,1);
float m13 = matrix[2]; // m(0,2);
float m22 = matrix[3]; // m(1,1);
float m23 = matrix[4]; // m(1,2);
float m33 = matrix[5]; // m(2,2);
//r.SetIdentity();
eigen_vec[0].set(1, 0, 0);
eigen_vec[1].set(0, 1, 0);
eigen_vec[2].set(0, 0, 1);
for (int a = 0; a < maxSweeps; a++)
{
// Exit if off-diagonal entries small enough
if ((fabs(m12) < epsilon) && (fabs(m13) < epsilon) && (fabs(m23) < epsilon))
{
break;
}
// Annihilate (1,2) entry
if (m12 != 0.0f)
{
float u = (m22 - m11) * 0.5f / m12;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m11 -= t * m12;
m22 += t * m12;
m12 = 0.0f;
float temp = c * m13 - s * m23;
m23 = s * m13 + c * m23;
m13 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].x - s * eigen_vec[i].y;
eigen_vec[i].y = s * eigen_vec[i].x + c * eigen_vec[i].y;
eigen_vec[i].x = temp;
}
}
// Annihilate (1,3) entry
if (m13 != 0.0f)
{
float u = (m33 - m11) * 0.5f / m13;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m11 -= t * m13;
m33 += t * m13;
m13 = 0.0f;
float temp = c * m12 - s * m23;
m23 = s * m12 + c * m23;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].x - s * eigen_vec[i].z;
eigen_vec[i].z = s * eigen_vec[i].x + c * eigen_vec[i].z;
eigen_vec[i].x = temp;
}
}
// Annihilate (2,3) entry
if (m23 != 0.0f)
{
float u = (m33 - m22) * 0.5f / m23;
float u2 = u * u;
float u2p1 = u2 + 1.0f;
float t = (u2p1 != u2) ? ((u < 0.0f) ? -1.0f : 1.0f) * (sqrt(u2p1) - fabs(u)) : 0.5f / u;
float c = 1.0f / sqrt(t * t + 1.0f);
float s = c * t;
m22 -= t * m23;
m33 += t * m23;
m23 = 0.0f;
float temp = c * m12 - s * m13;
m13 = s * m12 + c * m13;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * eigen_vec[i].y - s * eigen_vec[i].z;
eigen_vec[i].z = s * eigen_vec[i].y + c * eigen_vec[i].z;
eigen_vec[i].y = temp;
}
}
}
eigen_val[0] = m11;
eigen_val[1] = m22;
eigen_val[2] = m33;
#endif
}
/*---------------------------------------------------------------------------
Functions
---------------------------------------------------------------------------*/
/** @@ I don't remember where did I get this function.
* computes the eigen values and eigen vectors
* of a semi definite symmetric matrix
*
* - matrix is stored in column symmetric storage, i.e.
* matrix = { m11, m12, m22, m13, m23, m33, m14, m24, m34, m44 ... }
* size = n(n+1)/2
*
* - eigen_vectors (return) = { v1, v2, v3, ..., vn } where vk = vk0, vk1, ..., vkn
* size = n^2, must be allocated by caller
*
* - eigen_values (return) are in decreasing order
* size = n, must be allocated by caller
*/
void semi_definite_symmetric_eigen(
const float *mat, int n, float *eigen_vec, float *eigen_val
) {
float *a,*v;
float a_norm,a_normEPS,thr,thr_nn;
int nb_iter = 0;
int jj;
int i,j,k,ij,ik,l,m,lm,mq,lq,ll,mm,imv,im,iq,ilv,il,nn;
int *index;
float a_ij,a_lm,a_ll,a_mm,a_im,a_il;
float a_lm_2;
float v_ilv,v_imv;
float x;
float sinx,sinx_2,cosx,cosx_2,sincos;
float delta;
// Number of entries in mat
nn = (n*(n+1))/2;
// Step 1: Copy mat to a
a = new float[nn];
for( ij=0; ij<nn; ij++ ) {
a[ij] = mat[ij];
}
// Ugly Fortran-porting trick: indices for a are between 1 and n
a--;
// Step 2 : Init diagonalization matrix as the unit matrix
v = new float[n*n];
ij = 0;
for( i=0; i<n; i++ ) {
for( j=0; j<n; j++ ) {
if( i==j ) {
v[ij++] = 1.0;
} else {
v[ij++] = 0.0;
}
}
}
// Ugly Fortran-porting trick: indices for v are between 1 and n
v--;
// Step 3 : compute the weight of the non diagonal terms
ij = 1 ;
a_norm = 0.0;
for( i=1; i<=n; i++ ) {
for( j=1; j<=i; j++ ) {
if( i!=j ) {
a_ij = a[ij];
a_norm += a_ij*a_ij;
}
ij++;
}
}
if( a_norm != 0.0 ) {
a_normEPS = a_norm*EPS;
thr = a_norm ;
// Step 4 : rotations
while( thr > a_normEPS && nb_iter < MAX_ITER ) {
nb_iter++;
thr_nn = thr / nn;
for( l=1 ; l< n; l++ ) {
for( m=l+1; m<=n; m++ ) {
// compute sinx and cosx
lq = (l*l-l)/2;
mq = (m*m-m)/2;
lm = l+mq;
a_lm = a[lm];
a_lm_2 = a_lm*a_lm;
if( a_lm_2 < thr_nn ) {
continue ;
}
ll = l+lq;
mm = m+mq;
a_ll = a[ll];
a_mm = a[mm];
delta = a_ll - a_mm;
if( delta == 0.0f ) {
x = - PI/4 ;
} else {
x = - atanf( (a_lm+a_lm) / delta ) / 2.0f ;
}
sinx = sinf(x);
cosx = cosf(x);
sinx_2 = sinx*sinx;
cosx_2 = cosx*cosx;
sincos = sinx*cosx;
// rotate L and M columns
ilv = n*(l-1);
imv = n*(m-1);
for( i=1; i<=n;i++ ) {
if( (i!=l) && (i!=m) ) {
iq = (i*i-i)/2;
if( i<m ) {
im = i + mq;
} else {
im = m + iq;
}
a_im = a[im];
if( i<l ) {
il = i + lq;
} else {
il = l + iq;
}
a_il = a[il];
a[il] = a_il*cosx - a_im*sinx;
a[im] = a_il*sinx + a_im*cosx;
}
ilv++;
imv++;
v_ilv = v[ilv];
v_imv = v[imv];
v[ilv] = cosx*v_ilv - sinx*v_imv;
v[imv] = sinx*v_ilv + cosx*v_imv;
}
x = a_lm*sincos; x+=x;
a[ll] = a_ll*cosx_2 + a_mm*sinx_2 - x;
a[mm] = a_ll*sinx_2 + a_mm*cosx_2 + x;
a[lm] = 0.0;
thr = fabs( thr - a_lm_2 );
}
}
}
}
// Step 5: index conversion and copy eigen values
// back from Fortran to C++
a++;
for( i=0; i<n; i++ ) {
k = i + (i*(i+1))/2;
eigen_val[i] = a[k];
}
delete[] a;
// Step 6: sort the eigen values and eigen vectors
index = new int[n];
for( i=0; i<n; i++ ) {
index[i] = i;
}
for( i=0; i<(n-1); i++ ) {
x = eigen_val[i];
k = i;
for( j=i+1; j<n; j++ ) {
if( x < eigen_val[j] ) {
k = j;
x = eigen_val[j];
}
}
eigen_val[k] = eigen_val[i];
eigen_val[i] = x;
jj = index[k];
index[k] = index[i];
index[i] = jj;
}
// Step 7: save the eigen vectors
v++; // back from Fortran to to C++
ij = 0;
for( k=0; k<n; k++ ) {
ik = index[k]*n;
for( i=0; i<n; i++ ) {
eigen_vec[ij++] = v[ik++];
}
}
delete[] v ;
delete[] index;
return;
}
//_________________________________________________________
// Eric Lengyel code:
// http://www.terathon.com/code/linear.html
#if 0
const float epsilon = 1.0e-10F;
const int maxSweeps = 32;
struct Matrix3D
{
float n[3][3];
float& operator()(int i, int j)
{
return (n[j][i]);
}
const float& operator()(int i, int j) const
{
return (n[j][i]);
}
void SetIdentity(void)
{
n[0][0] = n[1][1] = n[2][2] = 1.0F;
n[0][1] = n[0][2] = n[1][0] = n[1][2] = n[2][0] = n[2][1] = 0.0F;
}
};
void CalculateEigensystem(const Matrix3D& m, float *lambda, Matrix3D& r)
{
float m11 = m(0,0);
float m12 = m(0,1);
float m13 = m(0,2);
float m22 = m(1,1);
float m23 = m(1,2);
float m33 = m(2,2);
r.SetIdentity();
for (int a = 0; a < maxSweeps; a++)
{
// Exit if off-diagonal entries small enough
if ((Fabs(m12) < epsilon) && (Fabs(m13) < epsilon) &&
(Fabs(m23) < epsilon)) break;
// Annihilate (1,2) entry
if (m12 != 0.0F)
{
float u = (m22 - m11) * 0.5F / m12;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m11 -= t * m12;
m22 += t * m12;
m12 = 0.0F;
float temp = c * m13 - s * m23;
m23 = s * m13 + c * m23;
m13 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,0) - s * r(i,1);
r(i,1) = s * r(i,0) + c * r(i,1);
r(i,0) = temp;
}
}
// Annihilate (1,3) entry
if (m13 != 0.0F)
{
float u = (m33 - m11) * 0.5F / m13;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m11 -= t * m13;
m33 += t * m13;
m13 = 0.0F;
float temp = c * m12 - s * m23;
m23 = s * m12 + c * m23;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,0) - s * r(i,2);
r(i,2) = s * r(i,0) + c * r(i,2);
r(i,0) = temp;
}
}
// Annihilate (2,3) entry
if (m23 != 0.0F)
{
float u = (m33 - m22) * 0.5F / m23;
float u2 = u * u;
float u2p1 = u2 + 1.0F;
float t = (u2p1 != u2) ?
((u < 0.0F) ? -1.0F : 1.0F) * (sqrt(u2p1) - fabs(u)) : 0.5F / u;
float c = 1.0F / sqrt(t * t + 1.0F);
float s = c * t;
m22 -= t * m23;
m33 += t * m23;
m23 = 0.0F;
float temp = c * m12 - s * m13;
m13 = s * m12 + c * m13;
m12 = temp;
for (int i = 0; i < 3; i++)
{
float temp = c * r(i,1) - s * r(i,2);
r(i,2) = s * r(i,1) + c * r(i,2);
r(i,1) = temp;
}
}
}
lambda[0] = m11;
lambda[1] = m22;
lambda[2] = m33;
}
#endif

View File

@ -1,140 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_EIGEN_H
#define NV_MATH_EIGEN_H
#include <nvcore/Containers.h> // swap
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
// Compute first eigen vector using the power method.
Vector3 firstEigenVector(float matrix[6]);
/// Generic eigen-solver.
class Eigen
{
public:
/// Ctor.
Eigen(uint n) : N(n)
{
uint size = n * (n + 1) / 2;
matrix = new float[size];
eigen_vec = new float[N*N];
eigen_val = new float[N];
}
/// Dtor.
~Eigen()
{
delete [] matrix;
delete [] eigen_vec;
delete [] eigen_val;
}
NVMATH_API void solve();
/// Matrix accesor.
float & operator()(uint x, uint y)
{
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Matrix const accessor.
float operator()(uint x, uint y) const
{
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
Vector3 eigenVector3(uint i) const
{
nvCheck(3 == N);
nvCheck(i < N);
return Vector3(eigen_vec[i*N+0], eigen_vec[i*N+1], eigen_vec[i*N+2]);
}
Vector4 eigenVector4(uint i) const
{
nvCheck(4 == N);
nvCheck(i < N);
return Vector4(eigen_vec[i*N+0], eigen_vec[i*N+1], eigen_vec[i*N+2], eigen_vec[i*N+3]);
}
float eigenValue(uint i) const
{
nvCheck(i < N);
return eigen_val[i];
}
private:
const uint N;
float * matrix;
float * eigen_vec;
float * eigen_val;
};
/// 3x3 eigen-solver.
/// Based on Eric Lengyel's code:
/// http://www.terathon.com/code/linear.html
class Eigen3
{
public:
/** Ctor. */
Eigen3() {}
NVMATH_API void solve();
/// Matrix accesor.
float & operator()(uint x, uint y)
{
nvDebugCheck( x < 3 && y < 3 );
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Matrix const accessor.
float operator()(uint x, uint y) const
{
nvDebugCheck( x < 3 && y < 3 );
if( x > y ) {
swap(x, y);
}
return matrix[y * (y + 1) / 2 + x];
}
/// Get ith eigen vector.
Vector3 eigenVector(uint i) const
{
nvCheck(i < 3);
return eigen_vec[i];
}
/** Get ith eigen value. */
float eigenValue(uint i) const
{
nvCheck(i < 3);
return eigen_val[i];
}
private:
float matrix[3+2+1];
Vector3 eigen_vec[3];
float eigen_val[3];
};
} // nv namespace
#endif // NV_MATH_EIGEN_H

View File

@ -1,134 +0,0 @@
// License: Wild Magic License Version 3
// http://geometrictools.com/License/WildMagic3License.pdf
#include "Fitting.h"
#include "Eigen.h"
using namespace nv;
/** Fit a 3d line to the given set of points.
*
* Based on code from:
* http://geometrictools.com/
*/
Line3 Fit::bestLine(const Array<Vector3> & pointArray)
{
nvDebugCheck(pointArray.count() > 0);
Line3 line;
const uint pointCount = pointArray.count();
const float inv_num = 1.0f / pointCount;
// compute the mean of the points
Vector3 center(zero);
for(uint i = 0; i < pointCount; i++) {
center += pointArray[i];
}
line.setOrigin(center * inv_num);
// compute the covariance matrix of the points
float covariance[6] = {0, 0, 0, 0, 0, 0};
for(uint i = 0; i < pointCount; i++) {
Vector3 diff = pointArray[i] - line.origin();
covariance[0] += diff.x() * diff.x();
covariance[1] += diff.x() * diff.y();
covariance[2] += diff.x() * diff.z();
covariance[3] += diff.y() * diff.y();
covariance[4] += diff.y() * diff.z();
covariance[5] += diff.z() * diff.z();
}
line.setDirection(normalizeSafe(firstEigenVector(covariance), Vector3(zero), 0.0f));
// @@ This variant is from David Eberly... I'm not sure how that works.
/*sum_xx *= inv_num;
sum_xy *= inv_num;
sum_xz *= inv_num;
sum_yy *= inv_num;
sum_yz *= inv_num;
sum_zz *= inv_num;
// set up the eigensolver
Eigen3 ES;
ES(0,0) = sum_yy + sum_zz;
ES(0,1) = -sum_xy;
ES(0,2) = -sum_xz;
ES(1,1) = sum_xx + sum_zz;
ES(1,2) = -sum_yz;
ES(2,2) = sum_xx + sum_yy;
// compute eigenstuff, smallest eigenvalue is in last position
ES.solve();
line.setDirection(ES.eigenVector(2));
nvCheck( isNormalized(line.direction()) );
*/
return line;
}
/** Fit a 3d plane to the given set of points.
*
* Based on code from:
* http://geometrictools.com/
*/
Vector4 Fit::bestPlane(const Array<Vector3> & pointArray)
{
Vector3 center(zero);
const uint pointCount = pointArray.count();
const float inv_num = 1.0f / pointCount;
// compute the mean of the points
for(uint i = 0; i < pointCount; i++) {
center += pointArray[i];
}
center *= inv_num;
// compute the covariance matrix of the points
float sum_xx = 0.0f;
float sum_xy = 0.0f;
float sum_xz = 0.0f;
float sum_yy = 0.0f;
float sum_yz = 0.0f;
float sum_zz = 0.0f;
for(uint i = 0; i < pointCount; i++) {
Vector3 diff = pointArray[i] - center;
sum_xx += diff.x() * diff.x();
sum_xy += diff.x() * diff.y();
sum_xz += diff.x() * diff.z();
sum_yy += diff.y() * diff.y();
sum_yz += diff.y() * diff.z();
sum_zz += diff.z() * diff.z();
}
sum_xx *= inv_num;
sum_xy *= inv_num;
sum_xz *= inv_num;
sum_yy *= inv_num;
sum_yz *= inv_num;
sum_zz *= inv_num;
// set up the eigensolver
Eigen3 ES;
ES(0,0) = sum_yy + sum_zz;
ES(0,1) = -sum_xy;
ES(0,2) = -sum_xz;
ES(1,1) = sum_xx + sum_zz;
ES(1,2) = -sum_yz;
ES(2,2) = sum_xx + sum_yy;
// compute eigenstuff, greatest eigenvalue is in first position
ES.solve();
Vector3 normal = ES.eigenVector(0);
nvCheck(isNormalized(normal));
float offset = dot(normal, center);
return Vector4(normal, offset);
}

View File

@ -1,78 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_FITTING_H
#define NV_MATH_FITTING_H
#include <nvmath/Vector.h>
namespace nv
{
/// 3D Line.
struct Line3
{
/// Ctor.
Line3() : m_origin(zero), m_direction(zero)
{
}
/// Copy ctor.
Line3(const Line3 & l) : m_origin(l.m_origin), m_direction(l.m_direction)
{
}
/// Ctor.
Line3(Vector3::Arg o, Vector3::Arg d) : m_origin(o), m_direction(d)
{
}
/// Normalize the line.
void normalize()
{
m_direction = nv::normalize(m_direction);
}
/// Project a point onto the line.
Vector3 projectPoint(Vector3::Arg point) const
{
nvDebugCheck(isNormalized(m_direction));
Vector3 v = point - m_origin;
return m_origin + m_direction * dot(m_direction, v);
}
/// Compute distance to line.
float distanceToPoint(Vector3::Arg point) const
{
nvDebugCheck(isNormalized(m_direction));
Vector3 v = point - m_origin;
Vector3 l = v - m_direction * dot(m_direction, v);
return length(l);
}
const Vector3 & origin() const { return m_origin; }
void setOrigin(Vector3::Arg value) { m_origin = value; }
const Vector3 & direction() const { return m_direction; }
void setDirection(Vector3::Arg value) { m_direction = value; }
private:
Vector3 m_origin;
Vector3 m_direction;
};
namespace Fit
{
NVMATH_API Line3 bestLine(const Array<Vector3> & pointArray);
NVMATH_API Vector4 bestPlane(const Array<Vector3> & pointArray);
} // Fit namespace
} // nv namespace
#endif // _PI_MATHLIB_FITTING_H_

17
src/nvmath/Plane.cpp Normal file
View File

@ -0,0 +1,17 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "Plane.h"
#include "Matrix.h"
namespace nv
{
Plane transformPlane(const Matrix& m, Plane::Arg p)
{
Vector3 newVec = transformVector(m, p.vector());
Vector3 ptInPlane = p.offset() * p.vector();
ptInPlane = transformPoint(m, ptInPlane);
return Plane(newVec, ptInPlane);
}
}

77
src/nvmath/Plane.h Normal file
View File

@ -0,0 +1,77 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_PLANE_H
#define NV_MATH_PLANE_H
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
namespace nv
{
class Matrix;
class NVMATH_CLASS Plane
{
public:
typedef Plane const & Arg;
Plane();
Plane(float x, float y, float z, float w);
Plane(Vector4::Arg v);
Plane(Vector3::Arg v, float d);
Plane(Vector3::Arg normal, Vector3::Arg point);
const Plane & operator=(Plane::Arg v);
Vector3 vector() const;
scalar offset() const;
const Vector4 & asVector() const;
Vector4 & asVector();
void operator*=(scalar s);
private:
Vector4 p;
};
inline Plane::Plane() {}
inline Plane::Plane(float x, float y, float z, float w) : p(x, y, z, w) {}
inline Plane::Plane(Vector4::Arg v) : p(v) {}
inline Plane::Plane(Vector3::Arg v, float d) : p(v, d) {}
inline Plane::Plane(Vector3::Arg normal, Vector3::Arg point) : p(normal, dot(normal, point)) {}
inline const Plane & Plane::operator=(Plane::Arg v) { p = v.p; return *this; }
inline Vector3 Plane::vector() const { return p.xyz(); }
inline scalar Plane::offset() const { return p.w(); }
inline const Vector4 & Plane::asVector() const { return p; }
inline Vector4 & Plane::asVector() { return p; }
// Normalize plane.
inline Plane normalize(Plane::Arg plane, float epsilon = NV_EPSILON)
{
const float len = length(plane.vector());
nvDebugCheck(!isZero(len, epsilon));
const float inv = 1.0f / len;
return Plane(plane.asVector() * inv);
}
// Get the distance from the given point to this plane.
inline float distance(Plane::Arg plane, Vector3::Arg point)
{
return dot(plane.vector(), point) - plane.offset();
}
inline void Plane::operator*=(scalar s)
{
scale(p, s);
}
Plane transformPlane(const Matrix&, Plane::Arg);
} // nv namespace
#endif // NV_MATH_PLANE_H

View File

@ -79,9 +79,6 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)

View File

@ -98,7 +98,15 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
QuickCompress::compressDXT1a(rgba, &block);
// @@ We could do better here: check for single RGB, but varying alpha.
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1a(rgba, &block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -119,7 +127,7 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -140,7 +148,8 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
compressBlock_BoundsRange(rgba, &block);
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -164,8 +173,9 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva
// copy X coordinate to alpha channel and Y coordinate to green channel.
rgba.swizzleDXT5n();
compressBlock_BoundsRange(rgba, &block);
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -191,7 +201,7 @@ void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private
void nv::doPrecomputation()
{
static bool done = false; // @@ Stop using statics for reentrancy.
static bool done = false; // @@ Stop using statics for reentrancy. Although the worst that could happen is that this stuff is precomputed multiple times.
if (!done)
{
@ -256,10 +266,16 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
rgba.init(image, x, y);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kDxt1|squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, squish::kDxt1);
fit.Compress(&block);
}
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -286,7 +302,7 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output
rgba.init(image, x, y);
// Compress explicit alpha.
compressBlock(rgba, &block.alpha);
QuickCompress::compressDXT3A(rgba, &block.alpha);
// Compress color.
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
@ -317,14 +333,13 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
rgba.init(image, x, y);
// Compress alpha.
uint error;
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
error = compressBlock_Iterative(rgba, &block.alpha);
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress color.
@ -359,10 +374,13 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
rgba.swizzleDXT5n();
// Compress X.
uint error = compressBlock_Iterative(rgba, &block.alpha);
if (compressionOptions.quality == Quality_Highest)
{
error = compressBlock_BruteForce(rgba, &block.alpha);
compressBlock_BruteForce(rgba, &block.alpha);
}
else
{
QuickCompress::compressDXT5A(rgba, &block.alpha);
}
// Compress Y.
@ -384,23 +402,19 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
ColorBlock rgba;
AlphaBlockDXT5 block;
uint totalError = 0;
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
//error = compressBlock_BoundsRange(rgba, &block);
uint error = compressBlock_Iterative(rgba, &block);
if (compressionOptions.quality == Quality_Highest)
{
// Try brute force algorithm.
error = compressBlock_BruteForce(rgba, &block);
compressBlock_BruteForce(rgba, &block);
}
else
{
QuickCompress::compressDXT5A(rgba, &block);
}
totalError += error;
if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -429,18 +443,15 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o
ycolor.init(image, x, y);
ycolor.splatY();
// @@ Compute normal error, instead of separate xy errors.
uint xerror, yerror;
if (compressionOptions.quality == Quality_Highest)
{
xerror = compressBlock_BruteForce(xcolor, &block.x);
yerror = compressBlock_BruteForce(ycolor, &block.y);
compressBlock_BruteForce(xcolor, &block.x);
compressBlock_BruteForce(ycolor, &block.y);
}
else
{
xerror = compressBlock_Iterative(xcolor, &block.x);
yerror = compressBlock_Iterative(ycolor, &block.y);
QuickCompress::compressDXT5A(xcolor, &block.x);
QuickCompress::compressDXT5A(ycolor, &block.y);
}
if (outputOptions.outputHandler != NULL) {

View File

@ -115,12 +115,18 @@ void nv::compressRGB(const Image * image, const OutputOptions::Private & outputO
c |= PixelFormat::convert(src[x].b, 8, bsize) << bshift;
c |= PixelFormat::convert(src[x].a, 8, asize) << ashift;
// Output one byte at a time. @@ Not tested... Does this work on LE and BE?
// Output one byte at a time.
for (uint i = 0; i < byteCount; i++)
{
*(dst + x * byteCount) = (c >> (i * 8)) & 0xFF;
*(dst + x * byteCount + i) = (c >> (i * 8)) & 0xFF;
}
}
// Zero padding.
for (uint x = w; x < pitch; x++)
{
*(dst + x) = 0;
}
}
if (outputOptions.outputHandler != NULL)

View File

@ -56,7 +56,7 @@ namespace
static int blockSize(Format format)
{
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) {
if (format == Format_DXT1 || format == Format_DXT1a) {
return 8;
}
else if (format == Format_DXT3) {
@ -71,9 +71,6 @@ namespace
else if (format == Format_BC5) {
return 16;
}
else if (format == Format_CTX1) {
return 8;
}
return 0;
}
@ -215,6 +212,12 @@ Compressor::Compressor() : m(*new Compressor::Private())
if (m.cudaEnabled)
{
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
@ -235,6 +238,12 @@ void Compressor::enableCudaAcceleration(bool enable)
if (m.cudaEnabled && m.cuda == NULL)
{
m.cuda = new CudaCompressor();
if (!m.cuda->isValid())
{
m.cudaEnabled = false;
m.cuda = NULL;
}
}
}
@ -329,14 +338,14 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
if (compressionOptions.format == Format_RGBA)
{
header.setPitch(4 * inputOptions.targetWidth);
header.setPitch(computePitch(inputOptions.targetWidth, compressionOptions.bitcount));
header.setPixelFormat(compressionOptions.bitcount, compressionOptions.rmask, compressionOptions.gmask, compressionOptions.bmask, compressionOptions.amask);
}
else
{
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) {
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
@ -357,10 +366,6 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
else if (compressionOptions.format == Format_CTX1) {
header.setFourCC('C', 'T', 'X', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
}
// Swap bytes if necessary.
@ -712,18 +717,6 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
}
}
}
else if (compressionOptions.format == Format_DXT1n)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1n(image, outputOptions, compressionOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
else if (compressionOptions.format == Format_DXT3)
{
if (compressionOptions.quality == Quality_Fastest)
@ -781,18 +774,6 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const Compressio
{
compressBC5(image, outputOptions, compressionOptions);
}
else if (compressionOptions.format == Format_CTX1)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressCTX1(image, outputOptions, compressionOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
return true;
}

File diff suppressed because it is too large Load Diff

View File

@ -38,40 +38,37 @@ namespace nv
// Color compression:
// Compressor that uses the extremes of the luminance axis.
void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the best fit axis.
void compressBlock_BestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force compressor for DXT5n
void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
// void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
// uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
@ -80,7 +77,7 @@ namespace nv
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
// uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace

View File

@ -288,62 +288,219 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices3(block, a, b);
}*/
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
namespace
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
uint idx = block->index(i);
float alpha;
if (idx < 2) alpha = 1.0f - idx;
else alpha = (8.0f - idx) / 7.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
swap(alpha0, alpha1);
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
// Flip indices:
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
} // namespace
namespace
{
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
totalError += besterror;
block->setIndex(i, best);
}
return totalError;
}
static void optimizeAlpha8(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
if (idx < 2) block->setIndex(i, 1 - idx);
else block->setIndex(i, 9 - idx);
float alpha;
if (idx < 2) alpha = 1.0f - idx;
else alpha = (8.0f - idx) / 7.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * rgba.color(i).a;
betax_sum += beta * rgba.color(i).a;
}
}
else if (alpha0 == alpha1)
{
for (int i = 0; i < 16; i++)
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 < alpha1)
{
block->setIndex(i, 0);
swap(alpha0, alpha1);
// Flip indices:
for (int i = 0; i < 16; i++)
{
uint idx = block->index(i);
if (idx < 2) block->setIndex(i, 1 - idx);
else block->setIndex(i, 9 - idx);
}
}
else if (alpha0 == alpha1)
{
for (int i = 0; i < 16; i++)
{
block->setIndex(i, 0);
}
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
/*
static void optimizeAlpha6(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
float alpha2_sum = 0;
float beta2_sum = 0;
float alphabeta_sum = 0;
float alphax_sum = 0;
float betax_sum = 0;
for (int i = 0; i < 16; i++)
{
uint8 x = rgba.color(i).a;
if (x == 0 || x == 255) continue;
uint bits = block->index(i);
if (bits == 6 || bits == 7) continue;
float alpha;
if (bits == 0) alpha = 1.0f;
else if (bits == 1) alpha = 0.0f;
else alpha = (6.0f - block->index(i)) / 5.0f;
float beta = 1 - alpha;
alpha2_sum += alpha * alpha;
beta2_sum += beta * beta;
alphabeta_sum += alpha * beta;
alphax_sum += alpha * x;
betax_sum += beta * x;
}
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
uint alpha0 = uint(min(max(a, 0.0f), 255.0f));
uint alpha1 = uint(min(max(b, 0.0f), 255.0f));
if (alpha0 > alpha1)
{
swap(alpha0, alpha1);
}
block->alpha0 = alpha0;
block->alpha1 = alpha1;
}
*/
static bool sameIndices(const AlphaBlockDXT5 & block0, const AlphaBlockDXT5 & block1)
{
const uint64 mask = ~uint64(0xFFFF);
return (block0.u | mask) == (block1.u | mask);
}
} // namespace
@ -360,10 +517,10 @@ void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
@ -398,6 +555,20 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
}
void QuickCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
if (rgba.a == 0)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
}
}
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{
if (!rgba.hasAlpha())
@ -436,66 +607,6 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
}
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Brute force green channel compressor
void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
@ -558,6 +669,7 @@ void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
// @@ Round instead of truncate. When rounding take into account bit expansion.
dxtBlock->alpha0 = rgba.color(0).a >> 4;
dxtBlock->alpha1 = rgba.color(1).a >> 4;
dxtBlock->alpha2 = rgba.color(2).a >> 4;
@ -582,9 +694,49 @@ void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
compressDXT3A(rgba, &dxtBlock->alpha);
}
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
{
// @@ TODO
uint8 alpha0 = 0;
uint8 alpha1 = 255;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
AlphaBlockDXT5 block;
block.alpha0 = alpha0 - (alpha0 - alpha1) / 34;
block.alpha1 = alpha1 + (alpha0 - alpha1) / 34;
uint besterror = computeAlphaIndices(rgba, &block);
AlphaBlockDXT5 bestblock = block;
while(true)
{
optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block);
if (error >= besterror)
{
// No improvement, stop.
break;
}
if (sameIndices(block, bestblock))
{
bestblock = block;
break;
}
besterror = error;
bestblock = block;
};
// Copy best block to result;
*dxtBlock = bestblock;
}
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock)

View File

@ -37,8 +37,9 @@ namespace nv
namespace QuickCompress
{
void compressDXT1(const Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);

View File

@ -205,43 +205,6 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
}
}
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16])
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float dps[16];
if (idx < 16)
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
// No need to synchronize, 16 < warp size.
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
float2 axis = bestFitLine(colors, sums[0]);
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float2 tmp = colors[idx];
colors[xrefs[idx]] = tmp;
}
}
////////////////////////////////////////////////////////////////////////////////
// Round color to RGB565 and expand
@ -258,26 +221,6 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
return v;
}
inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5);
v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f;
return v;
}
inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 255.0f);
v.y = rintf(__saturatef(v.y) * 255.0f);
*w = ((ushort)v.x << 8) | ((ushort)v.y);
v.x *= 1.0f / 255.0f;
v.y *= 1.0f / 255.0f;
return v;
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate permutations
@ -521,114 +464,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
}
*/
__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable3[bits & 3] * colors[i];
akku += prods3[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 4.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 4.0f) * (e.x + e.y);
}
__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 8-8 color and expand...
a = roundAndExpand88(a, start);
b = roundAndExpand88(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
////////////////////////////////////////////////////////////////////////////////
// Evaluate all permutations
@ -757,67 +592,6 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
}
*/
__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
__shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
for(int i = 0; i < 3; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break;
ushort start, end;
uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
errors[idx] = bestError;
}
__device__ void evalLevel4Permutations(const float3 * colors, const float * weights, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
@ -852,39 +626,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
errors[idx] = bestError;
}
__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
////////////////////////////////////////////////////////////////////////////////
@ -996,11 +737,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
result[bid].y = indices;
}
__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
{
saveBlockDXT1(start, end, permutation, xrefs, result);
}
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
{
const int bid = blockIdx.x;
@ -1012,10 +748,10 @@ __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
ushort color0 = (OMatch5[r][0] << 11) | (OMatch6[g][0] << 5) | OMatch5[b][0];
ushort color1 = (OMatch5[r][1] << 11) | (OMatch6[g][1] << 5) | OMatch5[b][1];
if (color0 < color1)
{
result[bid].x = (color0 << 16) | color1;
result[bid].y = 0xffffffff;
if (color0 < color1)
{
result[bid].x = (color0 << 16) | color1;
result[bid].y = 0xffffffff;
}
else
{
@ -1092,61 +828,6 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
}
__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
/*
__device__ float computeError(const float weights[16], uchar a0, uchar a1)
{
@ -1356,13 +1037,3 @@ extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint *
{
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}

File diff suppressed because it is too large Load Diff

View File

@ -37,11 +37,11 @@ namespace nv
CudaCompressor();
~CudaCompressor();
bool isValid() const;
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressDXT1n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressCTX1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
private:

View File

@ -87,64 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
return a.x == b.x && a.y == b.y && a.z == b.z;
}
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ __host__ float2 operator *(float f, float2 v)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator *(float2 v, float f)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator +(float2 a, float2 b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ __host__ void operator +=(float2 & b, float2 a)
{
b.x += a.x;
b.y += a.y;
}
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ __host__ void operator -=(float2 & b, float2 a)
{
b.x -= a.x;
b.y -= a.y;
}
inline __device__ __host__ float2 operator /(float2 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float2 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
}
inline __device__ __host__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __device__ __host__ float dot(float3 a, float3 b)
{
return a.x * b.x + a.y * b.y + a.z * b.z;
@ -301,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
return firstEigenVector(covariance);
}
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[3] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H

View File

@ -49,6 +49,14 @@
#define NVTT_VERSION 200
#define NVTT_DECLARE_PIMPL(Class) \
private: \
Class(const Class &); \
void operator=(const Class &); \
public: \
struct Private; \
Private & m
// Public interface.
namespace nvtt
@ -75,9 +83,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2
Format_DXT1n,
Format_CTX1,
};
/// Quality modes.
@ -92,6 +97,8 @@ namespace nvtt
/// Compression options. This class describes the desired compression format and other compression settings.
struct CompressionOptions
{
NVTT_DECLARE_PIMPL(CompressionOptions);
NVTT_API CompressionOptions();
NVTT_API ~CompressionOptions();
@ -107,10 +114,6 @@ namespace nvtt
NVTT_API void setPixelFormat(unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
NVTT_API void setQuantization(bool colorDithering, bool alphaDithering, bool binaryAlpha, int alphaThreshold = 127);
//private:
struct Private;
Private & m;
};
@ -173,6 +176,8 @@ namespace nvtt
/// Input options. Specify format and layout of the input texture.
struct InputOptions
{
NVTT_DECLARE_PIMPL(InputOptions);
NVTT_API InputOptions();
NVTT_API ~InputOptions();
@ -217,10 +222,6 @@ namespace nvtt
// Set resizing options.
NVTT_API void setMaxExtents(int d);
NVTT_API void setRoundMode(RoundMode mode);
//private:
struct Private;
Private & m;
};
@ -261,6 +262,8 @@ namespace nvtt
/// the compressor to the user.
struct OutputOptions
{
NVTT_DECLARE_PIMPL(OutputOptions);
NVTT_API OutputOptions();
NVTT_API ~OutputOptions();
@ -272,16 +275,14 @@ namespace nvtt
NVTT_API void setOutputHandler(OutputHandler * outputHandler);
NVTT_API void setErrorHandler(ErrorHandler * errorHandler);
NVTT_API void setOutputHeader(bool outputHeader);
//private:
struct Private;
Private & m;
};
/// Texture compressor.
struct Compressor
{
NVTT_DECLARE_PIMPL(Compressor);
NVTT_API Compressor();
NVTT_API ~Compressor();
@ -293,10 +294,6 @@ namespace nvtt
// Estimate the size of compressing the input with the given options.
NVTT_API int estimateSize(const InputOptions & inputOptions, const CompressionOptions & compressionOptions) const;
//private:
struct Private;
Private & m;
};

View File

@ -207,7 +207,6 @@ NVTT_API void nvttDestroyCompressionOptions(NvttCompressionOptions * compression
NVTT_API void nvttSetCompressionOptionsFormat(NvttCompressionOptions * compressionOptions, NvttFormat format);
NVTT_API void nvttSetCompressionOptionsQuality(NvttCompressionOptions * compressionOptions, NvttQuality quality);
NVTT_API void nvttSetCompressionOptionsColorWeights(NvttCompressionOptions * compressionOptions, float red, float green, float blue, float alpha);
NVTT_API void nvttEnableCompressionOptionsCudaCompression(NvttCompressionOptions * compressionOptions, NvttBoolean enable);
NVTT_API void nvttSetCompressionOptionsPixelFormat(NvttCompressionOptions * compressionOptions, unsigned int bitcount, unsigned int rmask, unsigned int gmask, unsigned int bmask, unsigned int amask);
NVTT_API void nvttSetCompressionOptionsQuantization(NvttCompressionOptions * compressionOptions, NvttBoolean colorDithering, NvttBoolean alphaDithering, NvttBoolean binaryAlpha, int alphaThreshold);

View File

@ -50,6 +50,16 @@ public:
return *this;
}
Vec4( const float * v )
{
union { vector float v; float c[4]; } u;
u.c[0] = v[0];
u.c[1] = v[1];
u.c[2] = v[2];
u.c[3] = v[3];
m_v = u.v;
}
Vec4( float x, float y, float z, float w )
{
union { vector float v; float c[4]; } u;

View File

@ -1,221 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdio.h> // printf
#include <stdlib.h> // rand
#include <time.h> // clock
#include <string.h> // memcpy, memcmp
#include <assert.h>
#define FRAME_COUNT 1000
#define WIDTH 2048
#define HEIGHT 2048
#define INPUT_SIZE (WIDTH*HEIGHT)
#define OUTPUT_SIZE (WIDTH*HEIGHT/16*2)
static int s_input[INPUT_SIZE];
static int s_reference[OUTPUT_SIZE];
static int s_output[OUTPUT_SIZE];
static int s_frame = 0;
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_ptr(NULL) {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
assert(size == sizeof(int) * OUTPUT_SIZE);
assert(width == WIDTH);
assert(height == HEIGHT);
assert(depth == 1);
assert(face == 0);
assert(miplevel == 0);
m_ptr = (unsigned char *)s_output;
if (s_frame == 1)
{
// Save first result as reference.
memcpy(s_reference, s_output, sizeof(int) * OUTPUT_SIZE);
}
else if (s_frame > 1)
{
// Compare against reference.
if (memcmp(s_output, s_reference, sizeof(int) * OUTPUT_SIZE) != 0)
{
printf("Compressed image different to original.\n");
exit(EXIT_FAILURE);
}
}
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
unsigned char * m_ptr;
};
void precomp()
{
unsigned int bitmaps[1024];
int num = 0;
printf("{\n");
printf("\t%8X,\n", 0);
bitmaps[0] = 0;
num = 1;
for (int a = 1; a <= 15; a++)
{
for (int b = a; b <= 15; b++)
{
for (int c = b; c <= 15; c++)
{
int indices[16];
int i = 0;
for(; i < a; i++) {
indices[i] = 0;
}
for(; i < a+b; i++) {
indices[i] = 2;
}
for(; i < a+b+c; i++) {
indices[i] = 3;
}
for(; i < 16; i++) {
indices[i] = 1;
}
unsigned int bm = 0;
for(i = 0; i < 16; i++) {
bm |= indices[i] << (i * 2);
}
printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
bitmaps[num] = bm;
num++;
}
}
}
printf("}\n");
printf("// num = %d\n", num);
/*
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
}
*/
}
int main(int argc, char *argv[])
{
//precomp();
nvtt::InputOptions inputOptions;
inputOptions.setTextureLayout(nvtt::TextureType_2D, WIDTH, HEIGHT);
for (int i = 0; i < INPUT_SIZE; i++)
{
s_input[i] = rand();
}
inputOptions.setMipmapData(s_input, WIDTH, HEIGHT);
inputOptions.setMipmapGeneration(false);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_DXT1);
// compressionOptions.setFormat(nvtt::Format_DXT1n);
// compressionOptions.setFormat(nvtt::Format_CTX1);
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Compressor compressor;
for (s_frame = 0; s_frame < FRAME_COUNT; s_frame++)
{
clock_t start = clock();
printf("compressing frame %d:\n", s_frame);
compressor.process(inputOptions, compressionOptions, outputOptions);
clock_t end = clock();
printf("time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
}
return EXIT_SUCCESS;
}