53 Commits
2.0.2 ... 2.0.3

Author SHA1 Message Date
92f730457c Set executable property to dll 2008-09-10 22:08:43 +00:00
90be3fa28d Tag 2.0.3 release. 2008-06-09 19:15:52 +00:00
55e7d3dec4 Delete outdated comment. 2008-06-03 06:34:18 +00:00
b5e373b734 Compile NVTT under G5/leopard 2008-05-23 22:22:09 +00:00
6ce542b5c0 Fix error in cmake file. 2008-05-23 06:51:02 +00:00
58e5f6534f Print version number in copyright statement. 2008-05-22 21:48:40 +00:00
eda4786ca6 Update version number. 2008-05-22 21:48:19 +00:00
29a720bf82 Do not print text. 2008-05-22 21:32:09 +00:00
1120f83f7d Fix errors. 2008-05-22 21:31:44 +00:00
c38c3dc584 Fix win32 buid errors and warnings due to FreeImage. 2008-05-22 21:31:15 +00:00
7d3d0ede9d Update projects to use FreeImage. 2008-05-22 21:30:34 +00:00
bc66a7ad74 Update cmake files. 2008-05-22 19:47:52 +00:00
ca8d17abf5 Reorganize external libs. 2008-05-22 19:47:23 +00:00
10e79dac9d Update changelog. 2008-05-21 19:22:43 +00:00
e068964423 Fix EXR loading code. Issue 45. Fix provided by alastairpatrick. 2008-05-21 19:17:27 +00:00
ea340443d9 Add -mipfilter command line option, per request of Noel Llopis. 2008-05-19 20:10:05 +00:00
fb2b0cb38c Fix bug reported by Noel Llopis.
Make sure FreeImage declarations are not used when FreeImage is not available.
2008-05-19 18:23:42 +00:00
c01566cd2f Add support for FreeImage in nvimage.
Add support for floating point input images in nvtt.
2008-05-15 09:47:55 +00:00
47bdab8e27 Define FREEIMAGE_LIBRARIES. 2008-05-15 09:46:45 +00:00
70267fda15 Add support for input floating point images. Patch provided by Jim Tilander. See issue 27. 2008-05-15 06:18:24 +00:00
aebcea412c Search for freeimage. 2008-05-15 06:17:46 +00:00
bccdcd49da Use standard gram schmidt 2008-05-15 06:04:17 +00:00
68e9f05794 Add freeimage cmake file. 2008-05-15 05:57:11 +00:00
0f186e688f Remove 'virtual' from non virtual methods. 2008-05-08 21:15:05 +00:00
38e9652d64 Remove executable flag. 2008-05-08 18:20:55 +00:00
f08114c1b5 Whops, check fix for vc9. 2008-05-08 18:18:53 +00:00
7b9f891f92 Update vc9 projects. 2008-05-07 21:58:50 +00:00
787c9bb8fb Add vc9 projects. 2008-05-07 21:55:33 +00:00
42220b981e Add vc9 folder 2008-05-07 21:45:48 +00:00
70331a37fd Refactor compression functions, group them into class methods. 2008-05-06 23:55:19 +00:00
2ffc4cd7ad Ups, checked in the file before saving it. 2008-05-06 23:34:46 +00:00
4ba8e87a38 Ups, checked in file before saving it. 2008-05-06 23:34:13 +00:00
d440d68aa8 Update ChangeLog. 2008-05-06 23:22:04 +00:00
48f61dbfc0 Add support for linear and swizzle transforms. Fixes issue 4. 2008-05-06 23:21:39 +00:00
94c3fa75a8 Add comments indicating where to perform linear color transforms. 2008-05-06 22:01:23 +00:00
c562af6d9b Integrate YCoCg color space conversion by Jim Tilander. 2008-05-06 21:49:10 +00:00
a889f2fda6 Add support for alpha modes in the CUDA compressors. 2008-05-06 20:04:05 +00:00
d855d0461b Add single color checks to CUDA compressors.
Use optimized bitmap table for CTX compressor.
2008-05-06 19:52:27 +00:00
246f2a409a Add cpp file to project so that a library is built. 2008-05-06 19:48:43 +00:00
6a6b3edce1 factorial optimization suggested by pponywong. 2008-05-06 06:37:06 +00:00
52b3bc9437 Update project files. Remove fast compressor, add optimal compressor. 2008-04-29 22:34:09 +00:00
f6ab357b09 Add missing files to project. 2008-04-29 22:33:42 +00:00
ce3a65c03e Fix operator= in String class. 2008-04-29 22:32:12 +00:00
8d9bf5c0b3 Fix after refactoring CPU compressors. Changes were not tested with CUDA enabled. 2008-04-29 22:31:55 +00:00
ab5265e642 Remove declaration of method that was removed. 2008-04-28 08:39:24 +00:00
fd1d5e41c7 Add missing files! 2008-04-28 06:22:26 +00:00
3980d5dc21 Update changelog. 2008-04-26 09:17:16 +00:00
15e7125b4b Check for single color blocks in all compressors. 2008-04-26 09:16:56 +00:00
921ee354c0 Remove legacy compressors.
Add iteration count parameter to iterative alpha compressor.
Add optimal compressors.
2008-04-26 08:02:30 +00:00
e3f7e303e4 Use FLT_MAX instead of INFINITE. The latter not supported in msvc. 2008-04-20 06:01:50 +00:00
1df69495fc Precompute fast cluster fit factors, and store as static const.
nvtt is completely reentrant now. Fixes issue 37.
cleanup interface of cuda compressors.
2008-04-18 08:49:32 +00:00
91eb30667f Add TLS class wrapper.
Fix AutoPtr operator=.
Fix typo.
2008-04-17 18:39:01 +00:00
6db5cffca6 Fix changelog. 2008-04-17 09:28:38 +00:00
79 changed files with 2586 additions and 6035 deletions

View File

@ -1,7 +1,8 @@
NVIDIA Texture Tools version 2.1.0 NVIDIA Texture Tools version 2.0.3
* CTX1 CUDA compressor. * More accurate DXT3 compressor. Fixes issue 38.
* DXT1n CUDA compressor. * Remove legacy compressors. Fix issue 34.
* Support alpha premultiplication by Charles Nicholson. See issue 30. * Check for single color in all compressors. Fixes issue 43.
* Fix error in fast downsample filter, reported by Noel Llopis.
NVIDIA Texture Tools version 2.0.2 NVIDIA Texture Tools version 2.0.2
* Fix copy ctor error reported by Richard Sim. * Fix copy ctor error reported by Richard Sim.
@ -10,7 +11,6 @@ NVIDIA Texture Tools version 2.0.2
* Fix RGBA modes with less than 32 bpp by Viktor Linder. * Fix RGBA modes with less than 32 bpp by Viktor Linder.
* Fix alpha decompression by Amorilia. See issue 40. * Fix alpha decompression by Amorilia. See issue 40.
* Avoid default-initialized constructors for POD types, reported by Jim Tilander. * Avoid default-initialized constructors for POD types, reported by Jim Tilander.
* Improved decompressor tool submitted by Amorilia. See issue 41.
* Add single color compresor for DXT1a. * Add single color compresor for DXT1a.
* Set swizzle code to ATI2 files. See issue 41. * Set swizzle code to ATI2 files. See issue 41.

View File

@ -2,7 +2,7 @@
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
NVIDIA Texture Tools NVIDIA Texture Tools
README.txt README.txt
Version 2.1 Version 2.0
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------
-------------------------------------------------------------------------------- --------------------------------------------------------------------------------

View File

@ -1 +1 @@
2.1.0 2.0.3

BIN
gnuwin32/lib/libpng.a Normal file

Binary file not shown.

BIN
gnuwin32/lib/libz.a Normal file

Binary file not shown.

View File

@ -278,6 +278,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib" AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib" AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17" TargetMachine="17"
/> />
<Tool <Tool

View File

@ -277,6 +277,7 @@
AdditionalDependencies="libpng.lib jpeg.lib tiff.lib" AdditionalDependencies="libpng.lib jpeg.lib tiff.lib"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe" OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
AdditionalLibraryDirectories="..\..\..\gnuwin32\lib" AdditionalLibraryDirectories="..\..\..\gnuwin32\lib"
LinkTimeCodeGeneration="1"
TargetMachine="17" TargetMachine="17"
/> />
<Tool <Tool

View File

@ -277,6 +277,10 @@
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx" Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}" UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
> >
<File
RelativePath="..\..\..\src\nvmath\Plane.cpp"
>
</File>
</Filter> </Filter>
<Filter <Filter
Name="Header Files" Name="Header Files"
@ -295,6 +299,10 @@
RelativePath="..\..\..\src\nvmath\Matrix.h" RelativePath="..\..\..\src\nvmath\Matrix.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvmath\Plane.h"
>
</File>
<File <File
RelativePath="..\..\..\src\nvmath\Vector.h" RelativePath="..\..\..\src\nvmath\Vector.h"
> >

View File

@ -3,18 +3,18 @@ Microsoft Visual Studio Solution File, Format Version 9.00
# Visual Studio 2005 # Visual Studio 2005
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvtt", "nvtt\nvtt.vcproj", "{1AEB7681-57D8-48EE-813D-5C41CC38B647}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38} {CE017322-01FC-4851-9C8B-64E9A8E26C38} = {CE017322-01FC-4851-9C8B-64E9A8E26C38}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvcompress", "nvcompress\nvcompress.vcproj", "{88079E38-83AA-4E8A-B18A-66A78D1B058B}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimage", "nvimage\nvimage.vcproj", "{4046F392-A18B-4C66-9639-3EABFFF5D531}"
@ -34,16 +34,16 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvddsinfo", "nvddsinfo\nvdd
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvdecompress", "nvdecompress\nvdecompress.vcproj", "{75A0527D-BFC9-49C3-B46B-CD1A901D5927}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvimgdiff", "nvimgdiff\nvimgdiff.vcproj", "{05A59E8B-EA70-4F22-89E8-E0927BA13064}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06} {50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531} {4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nvassemble.vcproj", "{3BC6D760-91E8-4FFB-BD0E-F86F367AD8EA}"
@ -55,18 +55,13 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvassemble", "nvassemble\nv
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}" Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "nvzoom", "nvzoom\nvzoom.vcproj", "{51999D3E-EF22-4BDD-965F-4201034D3DCE}"
ProjectSection(ProjectDependencies) = postProject ProjectSection(ProjectDependencies) = postProject
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} {F143D180-D4C4-4037-B3DE-BE89A21C8D1D} = {F143D180-D4C4-4037-B3DE-BE89A21C8D1D}
{4046F392-A18B-4C66-9639-3EABFFF5D531} = {4046F392-A18B-4C66-9639-3EABFFF5D531}
{50C465FE-B308-42BC-894D-89484482AF06} = {50C465FE-B308-42BC-894D-89484482AF06}
EndProjectSection EndProjectSection
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "Nvidia.TextureTools", "Nvidia.TextureTools\Nvidia.TextureTools.csproj", "{CAB55C39-8FA9-4912-98D9-E52669C8911D}"
EndProject EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "stress", "stress\stress.vcproj", "{317B694E-B5C1-42A6-956F-FC12B69175A6}"
ProjectSection(ProjectDependencies) = postProject
{1AEB7681-57D8-48EE-813D-5C41CC38B647} = {1AEB7681-57D8-48EE-813D-5C41CC38B647}
EndProjectSection
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU Debug (no cuda)|Any CPU = Debug (no cuda)|Any CPU
@ -319,22 +314,6 @@ Global
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Any CPU.Build.0 = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|Win32.ActiveCfg = Release|Any CPU
{CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU {CAB55C39-8FA9-4912-98D9-E52669C8911D}.Release|x64.ActiveCfg = Release|Any CPU
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug (no cuda)|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Any CPU.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|Win32.Build.0 = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Debug|x64.ActiveCfg = Debug|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release (no cuda)|x64.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Any CPU.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.ActiveCfg = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|Win32.Build.0 = Release|Win32
{317B694E-B5C1-42A6-956F-FC12B69175A6}.Release|x64.ActiveCfg = Release|Win32
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE

View File

@ -53,8 +53,8 @@ END
// //
VS_VERSION_INFO VERSIONINFO VS_VERSION_INFO VERSIONINFO
FILEVERSION 2,1,0,0 FILEVERSION 2,0,3,0
PRODUCTVERSION 2,1,0,0 PRODUCTVERSION 2,0,3,0
FILEFLAGSMASK 0x17L FILEFLAGSMASK 0x17L
#ifdef _DEBUG #ifdef _DEBUG
FILEFLAGS 0x1L FILEFLAGS 0x1L
@ -71,12 +71,12 @@ BEGIN
BEGIN BEGIN
VALUE "CompanyName", "NVIDIA Corporation" VALUE "CompanyName", "NVIDIA Corporation"
VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library" VALUE "FileDescription", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "FileVersion", "2, 1, 0, 0" VALUE "FileVersion", "2, 0, 3, 0"
VALUE "InternalName", "nvtt" VALUE "InternalName", "nvtt"
VALUE "LegalCopyright", "Copyright (C) 2007" VALUE "LegalCopyright", "Copyright (C) 2007"
VALUE "OriginalFilename", "nvtt.dll" VALUE "OriginalFilename", "nvtt.dll"
VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library" VALUE "ProductName", "NVIDIA Texture Tools Dynamic Link Library"
VALUE "ProductVersion", "2, 1, 0, 0" VALUE "ProductVersion", "2, 0, 3, 0"
END END
END END
BLOCK "VarFileInfo" BLOCK "VarFileInfo"

View File

@ -691,7 +691,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h" AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj" Outputs="$(IntDir)\$(InputName).obj"
/> />
@ -701,7 +701,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -D_DEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/Od,/Zi,/RTC1,/MDd -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h" AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj" Outputs="$(IntDir)\$(InputName).obj"
/> />
@ -711,7 +711,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m32 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h" AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj" Outputs="$(IntDir)\$(InputName).obj"
/> />
@ -721,7 +721,7 @@
> >
<Tool <Tool
Name="VCCustomBuildTool" Name="VCCustomBuildTool"
CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -keep -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;" CommandLine="&quot;$(CUDA_BIN_PATH)\nvcc.exe&quot; -m64 -ccbin &quot;$(VCInstallDir)bin&quot; -c -DNDEBUG -DWIN32 -D_CONSOLE -D_MBCS -Xcompiler /EHsc,/W3,/nologo,/Wp64,/O2,/Zi,/MD -I&quot;$(CUDA_INC_PATH)&quot; -I./ -o $(IntDir)\$(InputName).obj ..\\..\\..\\src\\nvtt\\cuda\\CompressKernel.cu&#x0D;&#x0A;"
AdditionalDependencies="CudaMath.h" AdditionalDependencies="CudaMath.h"
Outputs="$(IntDir)\$(InputName).obj" Outputs="$(IntDir)\$(InputName).obj"
/> />
@ -849,10 +849,6 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp" RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.cpp"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.cpp"
>
</File>
<File <File
RelativePath="..\..\..\src\nvtt\InputOptions.cpp" RelativePath="..\..\..\src\nvtt\InputOptions.cpp"
> >
@ -865,6 +861,10 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp" RelativePath="..\..\..\src\nvtt\nvtt_wrapper.cpp"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.cpp"
>
</File>
<File <File
RelativePath="..\..\..\src\nvtt\OutputOptions.cpp" RelativePath="..\..\..\src\nvtt\OutputOptions.cpp"
> >
@ -911,10 +911,6 @@
RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h" RelativePath="..\..\..\src\nvtt\cuda\CudaUtils.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvtt\FastCompressDXT.h"
>
</File>
<File <File
RelativePath="..\..\..\src\nvtt\InputOptions.h" RelativePath="..\..\..\src\nvtt\InputOptions.h"
> >
@ -927,6 +923,10 @@
RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h" RelativePath="..\..\..\src\nvtt\nvtt_wrapper.h"
> >
</File> </File>
<File
RelativePath="..\..\..\src\nvtt\OptimalCompressDXT.h"
>
</File>
<File <File
RelativePath="..\..\..\src\nvtt\OutputOptions.h" RelativePath="..\..\..\src\nvtt\OutputOptions.h"
> >

View File

@ -1,201 +0,0 @@
<?xml version="1.0" encoding="Windows-1252"?>
<VisualStudioProject
ProjectType="Visual C++"
Version="8.00"
Name="stress"
ProjectGUID="{317B694E-B5C1-42A6-956F-FC12B69175A6}"
RootNamespace="stress"
Keyword="Win32Proj"
>
<Platforms>
<Platform
Name="Win32"
/>
</Platforms>
<ToolFiles>
</ToolFiles>
<Configurations>
<Configuration
Name="Debug|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
Optimization="0"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;_DEBUG;_CONSOLE"
MinimalRebuild="true"
BasicRuntimeChecks="3"
RuntimeLibrary="3"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="4"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="2"
GenerateDebugInformation="true"
SubSystem="1"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
<Configuration
Name="Release|Win32"
OutputDirectory="$(ConfigurationName)\$(PlatformName)"
IntermediateDirectory="$(ConfigurationName)\$(PlatformName)"
ConfigurationType="1"
CharacterSet="1"
WholeProgramOptimization="1"
>
<Tool
Name="VCPreBuildEventTool"
/>
<Tool
Name="VCCustomBuildTool"
/>
<Tool
Name="VCXMLDataGeneratorTool"
/>
<Tool
Name="VCWebServiceProxyGeneratorTool"
/>
<Tool
Name="VCMIDLTool"
/>
<Tool
Name="VCCLCompilerTool"
AdditionalIncludeDirectories="..;..\..\..\src"
PreprocessorDefinitions="WIN32;NDEBUG;_CONSOLE"
RuntimeLibrary="2"
UsePrecompiledHeader="0"
WarningLevel="3"
Detect64BitPortabilityProblems="true"
DebugInformationFormat="3"
/>
<Tool
Name="VCManagedResourceCompilerTool"
/>
<Tool
Name="VCResourceCompilerTool"
/>
<Tool
Name="VCPreLinkEventTool"
/>
<Tool
Name="VCLinkerTool"
OutputFile="$(SolutionDir)\$(ConfigurationName).$(PlatformName)\bin\$(ProjectName).exe"
LinkIncremental="1"
GenerateDebugInformation="true"
SubSystem="1"
OptimizeReferences="2"
EnableCOMDATFolding="2"
TargetMachine="1"
/>
<Tool
Name="VCALinkTool"
/>
<Tool
Name="VCManifestTool"
/>
<Tool
Name="VCXDCMakeTool"
/>
<Tool
Name="VCBscMakeTool"
/>
<Tool
Name="VCFxCopTool"
/>
<Tool
Name="VCAppVerifierTool"
/>
<Tool
Name="VCWebDeploymentTool"
/>
<Tool
Name="VCPostBuildEventTool"
/>
</Configuration>
</Configurations>
<References>
</References>
<Files>
<Filter
Name="Source Files"
Filter="cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx"
UniqueIdentifier="{4FC737F1-C7A5-4376-A066-2A32D752A2FF}"
>
<File
RelativePath="..\..\..\src\nvtt\tests\stress.cpp"
>
</File>
</Filter>
<Filter
Name="Header Files"
Filter="h;hpp;hxx;hm;inl;inc;xsd"
UniqueIdentifier="{93995380-89BD-4b04-88EB-625FBE52EBFB}"
>
</Filter>
<Filter
Name="Resource Files"
Filter="rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav"
UniqueIdentifier="{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}"
>
</Filter>
</Files>
<Globals>
</Globals>
</VisualStudioProject>

View File

@ -1,11 +1,11 @@
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
SUBDIRS(nvcore) SUBDIRS(nvcore)
SUBDIRS(nvmath) SUBDIRS(nvmath)
SUBDIRS(nvimage) SUBDIRS(nvimage)
SUBDIRS(nvtt) SUBDIRS(nvtt)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_BINARY_DIR})
# OpenGL # OpenGL
INCLUDE(FindOpenGL) INCLUDE(FindOpenGL)
IF(OPENGL_FOUND) IF(OPENGL_FOUND)
@ -84,7 +84,6 @@ ELSE(PNG_FOUND)
ENDIF(PNG_FOUND) ENDIF(PNG_FOUND)
# TIFF # TIFF
SET(TIFF_NAMES libtiff)
INCLUDE(FindTIFF) INCLUDE(FindTIFF)
IF(TIFF_FOUND) IF(TIFF_FOUND)
SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise") SET(HAVE_TIFF ${TIFF_FOUND} CACHE BOOL "Set to TRUE if TIFF is found, FALSE otherwise")

View File

@ -1,147 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_ALGORITHMS_H
#define NV_CORE_ALGORITHMS_H
namespace nv
{
/// Return the maximum of two values.
template <typename T>
inline const T & max(const T & a, const T & b)
{
//return std::max(a, b);
if( a < b ) {
return b;
}
return a;
}
/// Return the minimum of two values.
template <typename T>
inline const T & min(const T & a, const T & b)
{
//return std::min(a, b);
if( b < a ) {
return b;
}
return a;
}
/// Clamp between two values.
template <typename T>
inline const T & clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for (typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
// @@ Swap should be implemented here.
#if 0
// This does not use swap, but copies, in some cases swaps are much faster than copies!
// Container should implement operator[], and size()
template <class Container, class T>
void insertionSort(Container<T> & container)
{
const uint n = container.size();
for (uint i=1; i < n; ++i)
{
T value = container[i];
uint j = i;
while (j > 0 && container[j-1] > value)
{
container[j] = container[j-1];
--j;
}
if (i != j)
{
container[j] = value;
}
}
}
template <class Container, class T>
void quickSort(Container<T> & container)
{
quickSort(container, 0, container.count());
}
{
/* threshhold for transitioning to insertion sort */
while (n > 12) {
int c01,c12,c,m,i,j;
/* compute median of three */
m = n >> 1;
c = p[0] > p[m];
c01 = c;
c = &p[m] > &p[n-1];
c12 = c;
/* if 0 >= mid >= end, or 0 < mid < end, then use mid */
if (c01 != c12) {
/* otherwise, we'll need to swap something else to middle */
int z;
c = p[0] < p[n-1];
/* 0>mid && mid<n: 0>n => n; 0<n => 0 */
/* 0<mid && mid>n: 0>n => 0; 0<n => n */
z = (c == c12) ? 0 : n-1;
swap(p[z], p[m]);
}
/* now p[m] is the median-of-three */
/* swap it to the beginning so it won't move around */
swap(p[0], p[m]);
/* partition loop */
i=1;
j=n-1;
for(;;) {
/* handling of equality is crucial here */
/* for sentinels & efficiency with duplicates */
for (;;++i) {
c = p[i] > p[0];
if (!c) break;
}
a = &p[0];
for (;;--j) {
b=&p[j];
c = p[j] > p[0]
if (!c) break;
}
/* make sure we haven't crossed */
if (i >= j) break;
swap(p[i], p[j]);
++i;
--j;
}
/* recurse on smaller side, iterate on larger */
if (j < (n-i)) {
quickSort(p, j);
p = p+i;
n = n-i;
}
else {
quickSort(p+i, n-i);
n = j;
}
}
insertionSort();
}
#endif // 0
} // nv namespace
#endif // NV_CORE_ALGORITHMS_H

View File

@ -80,13 +80,13 @@ public:
/// Clear all the bits. /// Clear all the bits.
void clearAll() void clearAll()
{ {
memset(m_bitArray.mutableBuffer(), 0, m_bitArray.size()); memset(m_bitArray.unsecureBuffer(), 0, m_bitArray.size());
} }
/// Set all the bits. /// Set all the bits.
void setAll() void setAll()
{ {
memset(m_bitArray.mutableBuffer(), 0xFF, m_bitArray.size()); memset(m_bitArray.unsecureBuffer(), 0xFF, m_bitArray.size());
} }
/// Toggle all the bits. /// Toggle all the bits.

View File

@ -3,13 +3,7 @@ ADD_SUBDIRECTORY(poshlib)
SET(CORE_SRCS SET(CORE_SRCS
nvcore.h nvcore.h
DefsGnucDarwin.h
DefsGnucLinux.h
DefsGnucWin32.h
DefsVcWin32.h
Ptr.h Ptr.h
RefCounted.h
RefCounted.cpp
BitArray.h BitArray.h
Memory.h Memory.h
Memory.cpp Memory.cpp
@ -25,10 +19,7 @@ SET(CORE_SRCS
TextWriter.h TextWriter.h
TextWriter.cpp TextWriter.cpp
Radix.h Radix.h
Radix.cpp Radix.cpp)
CpuInfo.h
CpuInfo.cpp
Algorithms.h)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -19,7 +19,6 @@ Do not use memmove in insert & remove, use copy ctors instead.
#include <nvcore/nvcore.h> #include <nvcore/nvcore.h>
#include <nvcore/Memory.h> #include <nvcore/Memory.h>
#include <nvcore/Debug.h> #include <nvcore/Debug.h>
//#include <nvcore/Stream.h>
#include <string.h> // memmove #include <string.h> // memmove
#include <new> // for placement new #include <new> // for placement new
@ -71,10 +70,40 @@ namespace nv
{ {
// Templates // Templates
/// Return the maximum of two values.
template <typename T>
inline const T & max(const T & a, const T & b)
{
//return std::max(a, b);
if( a < b ) {
return b;
}
return a;
}
/// Return the minimum of two values.
template <typename T>
inline const T & min(const T & a, const T & b)
{
//return std::min(a, b);
if( b < a ) {
return b;
}
return a;
}
/// Clamp between two values.
template <typename T>
inline const T & clamp(const T & x, const T & a, const T & b)
{
return min(max(x, a), b);
}
/// Swap two values. /// Swap two values.
template <typename T> template <typename T>
inline void swap(T & a, T & b) inline void swap(T & a, T & b)
{ {
//return std::swap(a, b);
T temp = a; T temp = a;
a = b; a = b;
b = temp; b = temp;
@ -105,6 +134,16 @@ namespace nv
uint operator()(uint x) const { return x; } uint operator()(uint x) const { return x; }
}; };
/// Delete all the elements of a container.
template <typename T>
void deleteAll(T & container)
{
for(typename T::PseudoIndex i = container.start(); !container.isDone(i); container.advance(i))
{
delete container[i];
}
}
/** Return the next power of two. /** Return the next power of two.
* @see http://graphics.stanford.edu/~seander/bithacks.html * @see http://graphics.stanford.edu/~seander/bithacks.html
@ -115,7 +154,7 @@ namespace nv
inline uint nextPowerOfTwo( uint x ) inline uint nextPowerOfTwo( uint x )
{ {
nvDebugCheck( x != 0 ); nvDebugCheck( x != 0 );
#if 1 // On modern CPUs this is supposed to be as fast as using the bsr instruction. #if 1 // On modern CPUs this is as fast as using the bsr instruction.
x--; x--;
x |= x >> 1; x |= x >> 1;
x |= x >> 2; x |= x >> 2;
@ -138,6 +177,15 @@ namespace nv
return (n & (n-1)) == 0; return (n & (n-1)) == 0;
} }
/// Simple iterator interface.
template <typename T>
struct Iterator
{
virtual void advance();
virtual bool isDone();
virtual T current();
};
/** /**
* Replacement for std::vector that is easier to debug and provides * Replacement for std::vector that is easier to debug and provides
@ -204,7 +252,7 @@ namespace nv
const T * buffer() const { return m_buffer; } const T * buffer() const { return m_buffer; }
/// Get vector pointer. /// Get vector pointer.
T * mutableBuffer() { return m_buffer; } T * unsecureBuffer() { return m_buffer; }
/// Is vector empty. /// Is vector empty.
bool isEmpty() const { return m_size == 0; } bool isEmpty() const { return m_size == 0; }

View File

@ -1,88 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/CpuInfo.h>
#include <nvcore/Debug.h>
using namespace nv;
#if NV_OS_WIN32
#define _WIN32_WINNT 0x0501
#define WIN32_LEAN_AND_MEAN
#include <windows.h>
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
static bool isWow64()
{
LPFN_ISWOW64PROCESS fnIsWow64Process = (LPFN_ISWOW64PROCESS)GetProcAddress(GetModuleHandle(TEXT("kernel32")), "IsWow64Process");
BOOL bIsWow64 = FALSE;
if (NULL != fnIsWow64Process)
{
if (!fnIsWow64Process(GetCurrentProcess(), &bIsWow64))
{
return false;
}
}
return bIsWow64 == TRUE;
}
#endif // NV_OS_WIN32
uint CpuInfo::processorCount()
{
#if NV_OS_WIN32
SYSTEM_INFO sysInfo;
typedef BOOL (WINAPI *LPFN_ISWOW64PROCESS) (HANDLE, PBOOL);
if (isWow64())
{
GetNativeSystemInfo(&sysInfo);
}
else
{
GetSystemInfo(&sysInfo);
}
uint count = (uint)sysInfo.dwNumberOfProcessors;
nvDebugCheck(count >= 1);
return count;
#else
return 1;
#endif
}
uint CpuInfo::coreCount()
{
return 1;
}
bool CpuInfo::hasMMX()
{
return false;
}
bool CpuInfo::hasSSE()
{
return false;
}
bool CpuInfo::hasSSE2()
{
return false;
}
bool CpuInfo::hasSSE3()
{
return false;
}

View File

@ -1,88 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_CPUINFO_H
#define NV_CORE_CPUINFO_H
#include <nvcore/nvcore.h>
#if NV_CC_MSVC
# include <intrin.h> // __rdtsc
#endif
namespace nv
{
// CPU Information.
class CpuInfo
{
static uint processorCount();
static uint coreCount();
static bool hasMMX();
static bool hasSSE();
static bool hasSSE2();
static bool hasSSE3();
};
#if NV_CC_MSVC
#pragma intrinsic(__rdtsc)
inline uint64 rdtsc()
{
return __rdtsc();
}
#endif
#if NV_CC_GNUC
#if defined(__i386__)
inline /*volatile*/ uint64 rdtsc()
{
uint64 x;
//__asm__ volatile ("rdtsc" : "=A" (x));
__asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
return x;
}
#elif defined(__x86_64__)
static __inline__ uint64 rdtsc(void)
{
unsigned int hi, lo;
__asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
}
#elif defined(__powerpc__)
static __inline__ uint64 rdtsc(void)
{
uint64 result=0;
unsigned long int upper, lower, tmp;
__asm__ volatile(
"0: \n"
"\tmftbu %0 \n"
"\tmftb %1 \n"
"\tmftbu %2 \n"
"\tcmpw %2,%0 \n"
"\tbne 0b \n"
: "=r"(upper),"=r"(lower),"=r"(tmp)
);
result = upper;
result = result<<32;
result = result|lower;
return(result);
}
#endif
#endif // NV_CC_GNUC
} // nv namespace
#endif // NV_CORE_CPUINFO_H

View File

@ -70,6 +70,8 @@ typedef uint32 uint;
#pragma warning(disable : 4711) // function selected for automatic inlining #pragma warning(disable : 4711) // function selected for automatic inlining
#pragma warning(disable : 4725) // Pentium fdiv bug #pragma warning(disable : 4725) // Pentium fdiv bug
#pragma warning(disable : 4345) // behavior change: an object of POD type constructed with an initializer of the form () will be default-initialized
#pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged. #pragma warning(disable : 4786) // Identifier was truncated and cannot be debugged.
#pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup #pragma warning(disable : 4675) // resolved overload was found by argument-dependent lookup

View File

@ -8,11 +8,6 @@
#include <stdio.h> // NULL #include <stdio.h> // NULL
#define NV_DECLARE_PTR(Class) \
typedef SmartPtr<class Class> Class ## Ptr; \
typedef SmartPtr<const class Class> ClassConst ## Ptr
namespace nv namespace nv
{ {
@ -98,23 +93,125 @@ private:
T * m_ptr; T * m_ptr;
}; };
#if 0
/** Reference counted base class to be used with Pointer.
*
* The only requirement of the Pointer class is that the RefCounted class implements the
* addRef and release methods.
*/
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0), m_weak_proxy(NULL)
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
mutable WeakProxy * weak_proxy;
};
#endif
/// Smart pointer template class. /// Smart pointer template class.
template <class BaseClass> template <class BaseClass>
class SmartPtr { class Pointer {
public: public:
// BaseClass must implement addRef() and release(). // BaseClass must implement addRef() and release().
typedef SmartPtr<BaseClass> ThisType; typedef Pointer<BaseClass> ThisType;
/// Default ctor. /// Default ctor.
SmartPtr() : m_ptr(NULL) Pointer() : m_ptr(NULL)
{ {
} }
/** Other type assignment. */ /** Other type assignment. */
template <class OtherBase> template <class OtherBase>
SmartPtr( const SmartPtr<OtherBase> & tc ) Pointer( const Pointer<OtherBase> & tc )
{ {
m_ptr = static_cast<BaseClass *>( tc.ptr() ); m_ptr = static_cast<BaseClass *>( tc.ptr() );
if( m_ptr ) { if( m_ptr ) {
@ -123,7 +220,7 @@ public:
} }
/** Copy ctor. */ /** Copy ctor. */
SmartPtr( const ThisType & bc ) Pointer( const ThisType & bc )
{ {
m_ptr = bc.ptr(); m_ptr = bc.ptr();
if( m_ptr ) { if( m_ptr ) {
@ -131,8 +228,8 @@ public:
} }
} }
/** Copy cast ctor. SmartPtr(NULL) is valid. */ /** Copy cast ctor. Pointer(NULL) is valid. */
explicit SmartPtr( BaseClass * bc ) explicit Pointer( BaseClass * bc )
{ {
m_ptr = bc; m_ptr = bc;
if( m_ptr ) { if( m_ptr ) {
@ -141,7 +238,7 @@ public:
} }
/** Dtor. */ /** Dtor. */
~SmartPtr() ~Pointer()
{ {
set(NULL); set(NULL);
} }
@ -152,14 +249,14 @@ public:
/** -> operator. */ /** -> operator. */
BaseClass * operator -> () const BaseClass * operator -> () const
{ {
nvCheck( m_ptr != NULL ); piCheck( m_ptr != NULL );
return m_ptr; return m_ptr;
} }
/** * operator. */ /** * operator. */
BaseClass & operator*() const BaseClass & operator*() const
{ {
nvCheck( m_ptr != NULL ); piCheck( m_ptr != NULL );
return *m_ptr; return *m_ptr;
} }
@ -175,7 +272,7 @@ public:
//@{ //@{
/** Other type assignment. */ /** Other type assignment. */
template <class OtherBase> template <class OtherBase>
void operator = ( const SmartPtr<OtherBase> & tc ) void operator = ( const Pointer<OtherBase> & tc )
{ {
set( static_cast<BaseClass *>(tc.ptr()) ); set( static_cast<BaseClass *>(tc.ptr()) );
} }
@ -198,7 +295,7 @@ public:
//@{ //@{
/** Other type equal comparation. */ /** Other type equal comparation. */
template <class OtherBase> template <class OtherBase>
bool operator == ( const SmartPtr<OtherBase> & other ) const bool operator == ( const Pointer<OtherBase> & other ) const
{ {
return m_ptr == other.ptr(); return m_ptr == other.ptr();
} }
@ -217,7 +314,7 @@ public:
/** Other type not equal comparation. */ /** Other type not equal comparation. */
template <class OtherBase> template <class OtherBase>
bool operator != ( const SmartPtr<OtherBase> & other ) const bool operator != ( const Pointer<OtherBase> & other ) const
{ {
return m_ptr != other.ptr(); return m_ptr != other.ptr();
} }

View File

@ -1,9 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include "RefCounted.h"
using namespace nv;
int nv::RefCounted::s_total_ref_count = 0;
int nv::RefCounted::s_total_obj_count = 0;

View File

@ -1,114 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_CORE_REFCOUNTED_H
#define NV_CORE_REFCOUNTED_H
#include <nvcore/nvcore.h>
#include <nvcore/Debug.h>
namespace nv
{
/// Reference counted base class to be used with SmartPtr and WeakPtr.
class RefCounted
{
NV_FORBID_COPY(RefCounted);
public:
/// Ctor.
RefCounted() : m_count(0)/*, m_weak_proxy(NULL)*/
{
s_total_obj_count++;
}
/// Virtual dtor.
virtual ~RefCounted()
{
nvCheck( m_count == 0 );
nvCheck( s_total_obj_count > 0 );
s_total_obj_count--;
}
/// Increase reference count.
uint addRef() const
{
s_total_ref_count++;
m_count++;
return m_count;
}
/// Decrease reference count and remove when 0.
uint release() const
{
nvCheck( m_count > 0 );
s_total_ref_count--;
m_count--;
if( m_count == 0 ) {
// releaseWeakProxy();
delete this;
return 0;
}
return m_count;
}
/*
/// Get weak proxy.
WeakProxy * getWeakProxy() const
{
if (m_weak_proxy == NULL) {
m_weak_proxy = new WeakProxy;
m_weak_proxy->AddRef();
}
return m_weak_proxy;
}
/// Release the weak proxy.
void releaseWeakProxy() const
{
if (m_weak_proxy != NULL) {
m_weak_proxy->NotifyObjectDied();
m_weak_proxy->Release();
m_weak_proxy = NULL;
}
}
*/
/** @name Debug methods: */
//@{
/// Get reference count.
int refCount() const
{
return m_count;
}
/// Get total number of objects.
static int totalObjectCount()
{
return s_total_obj_count;
}
/// Get total number of references.
static int totalReferenceCount()
{
return s_total_ref_count;
}
//@}
private:
NVCORE_API static int s_total_ref_count;
NVCORE_API static int s_total_obj_count;
mutable int m_count;
// mutable WeakProxy * weak_proxy;
};
} // nv namespace
#endif // NV_CORE_REFCOUNTED_H

View File

@ -48,7 +48,7 @@ const char * TextReader::readToEnd()
m_text.reserve(size + 1); m_text.reserve(size + 1);
m_text.resize(size); m_text.resize(size);
m_stream->serialize(m_text.mutableBuffer(), size); m_stream->serialize(m_text.unsecureBuffer(), size);
m_text.pushBack('\0'); m_text.pushBack('\0');
return m_text.buffer(); return m_text.buffer();

View File

@ -22,7 +22,6 @@
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Stream.h> #include <nvcore/Stream.h>
#include <nvcore/Containers.h> // swap
#include "ColorBlock.h" #include "ColorBlock.h"
#include "BlockDXT.h" #include "BlockDXT.h"

View File

@ -1,6 +1,5 @@
// This code is in the public domain -- castanyo@yahoo.es // This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Containers.h> // swap
#include <nvmath/Box.h> #include <nvmath/Box.h>
#include <nvimage/ColorBlock.h> #include <nvimage/ColorBlock.h>
#include <nvimage/Image.h> #include <nvimage/Image.h>

View File

@ -532,7 +532,7 @@ DDSHeader::DDSHeader()
// Store version information on the reserved header attributes. // Store version information on the reserved header attributes.
this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T'); this->reserved[9] = MAKEFOURCC('N', 'V', 'T', 'T');
this->reserved[10] = (0 << 16) | (9 << 8) | (5); // major.minor.revision this->reserved[10] = (2 << 16) | (0 << 8) | (3); // major.minor.revision
this->pf.size = 32; this->pf.size = 32;
this->pf.flags = 0; this->pf.flags = 0;

View File

@ -11,16 +11,16 @@ namespace nv
class Vector4; class Vector4;
/// Base filter class. /// Base filter class.
class NVIMAGE_CLASS Filter class Filter
{ {
public: public:
Filter(float width); NVIMAGE_API Filter(float width);
virtual ~Filter(); NVIMAGE_API virtual ~Filter();
float width() const { return m_width; } NVIMAGE_API float width() const { return m_width; }
float sampleDelta(float x, float scale) const; NVIMAGE_API float sampleDelta(float x, float scale) const;
float sampleBox(float x, float scale, int samples) const; NVIMAGE_API float sampleBox(float x, float scale, int samples) const;
float sampleTriangle(float x, float scale, int samples) const; NVIMAGE_API float sampleTriangle(float x, float scale, int samples) const;
virtual float evaluate(float x) const = 0; virtual float evaluate(float x) const = 0;
@ -29,56 +29,56 @@ namespace nv
}; };
// Box filter. // Box filter.
class NVIMAGE_CLASS BoxFilter : public Filter class BoxFilter : public Filter
{ {
public: public:
BoxFilter(); NVIMAGE_API BoxFilter();
BoxFilter(float width); NVIMAGE_API BoxFilter(float width);
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
}; };
// Triangle (bilinear/tent) filter. // Triangle (bilinear/tent) filter.
class NVIMAGE_CLASS TriangleFilter : public Filter class TriangleFilter : public Filter
{ {
public: public:
TriangleFilter(); NVIMAGE_API TriangleFilter();
TriangleFilter(float width); NVIMAGE_API TriangleFilter(float width);
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
}; };
// Quadratic (bell) filter. // Quadratic (bell) filter.
class NVIMAGE_CLASS QuadraticFilter : public Filter class QuadraticFilter : public Filter
{ {
public: public:
QuadraticFilter(); NVIMAGE_API QuadraticFilter();
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
}; };
// Cubic filter from Thatcher Ulrich. // Cubic filter from Thatcher Ulrich.
class NVIMAGE_CLASS CubicFilter : public Filter class CubicFilter : public Filter
{ {
public: public:
CubicFilter(); NVIMAGE_API CubicFilter();
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
}; };
// Cubic b-spline filter from Paul Heckbert. // Cubic b-spline filter from Paul Heckbert.
class NVIMAGE_CLASS BSplineFilter : public Filter class BSplineFilter : public Filter
{ {
public: public:
BSplineFilter(); NVIMAGE_API BSplineFilter();
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
}; };
/// Mitchell & Netravali's two-param cubic /// Mitchell & Netravali's two-param cubic
/// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88 /// @see "Reconstruction Filters in Computer Graphics", SIGGRAPH 88
class NVIMAGE_CLASS MitchellFilter : public Filter class MitchellFilter : public Filter
{ {
public: public:
MitchellFilter(); NVIMAGE_API MitchellFilter();
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
void setParameters(float a, float b); NVIMAGE_API void setParameters(float a, float b);
private: private:
float p0, p2, p3; float p0, p2, p3;
@ -86,29 +86,29 @@ namespace nv
}; };
// Lanczos3 filter. // Lanczos3 filter.
class NVIMAGE_CLASS LanczosFilter : public Filter class LanczosFilter : public Filter
{ {
public: public:
LanczosFilter(); NVIMAGE_API LanczosFilter();
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
}; };
// Sinc filter. // Sinc filter.
class NVIMAGE_CLASS SincFilter : public Filter class SincFilter : public Filter
{ {
public: public:
SincFilter(float w); NVIMAGE_API SincFilter(float w);
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
}; };
// Kaiser filter. // Kaiser filter.
class NVIMAGE_CLASS KaiserFilter : public Filter class KaiserFilter : public Filter
{ {
public: public:
KaiserFilter(float w); NVIMAGE_API KaiserFilter(float w);
virtual float evaluate(float x) const; NVIMAGE_API virtual float evaluate(float x) const;
void setParameters(float a, float stretch); NVIMAGE_API void setParameters(float a, float stretch);
private: private:
float alpha; float alpha;
@ -118,12 +118,12 @@ namespace nv
/// A 1D kernel. Used to precompute filter weights. /// A 1D kernel. Used to precompute filter weights.
class NVIMAGE_CLASS Kernel1 class Kernel1
{ {
NV_FORBID_COPY(Kernel1); NV_FORBID_COPY(Kernel1);
public: public:
Kernel1(const Filter & f, int iscale, int samples = 32); NVIMAGE_API Kernel1(const Filter & f, int iscale, int samples = 32);
~Kernel1(); NVIMAGE_API ~Kernel1();
float valueAt(uint x) const { float valueAt(uint x) const {
nvDebugCheck(x < (uint)m_windowSize); nvDebugCheck(x < (uint)m_windowSize);
@ -138,7 +138,7 @@ namespace nv
return m_width; return m_width;
} }
void debugPrint(); NVIMAGE_API void debugPrint();
private: private:
int m_windowSize; int m_windowSize;
@ -148,15 +148,15 @@ namespace nv
/// A 2D kernel. /// A 2D kernel.
class NVIMAGE_CLASS Kernel2 class Kernel2
{ {
public: public:
Kernel2(uint width); NVIMAGE_API Kernel2(uint width);
Kernel2(const Kernel2 & k); NVIMAGE_API Kernel2(const Kernel2 & k);
~Kernel2(); NVIMAGE_API ~Kernel2();
void normalize(); NVIMAGE_API void normalize();
void transpose(); NVIMAGE_API void transpose();
float valueAt(uint x, uint y) const { float valueAt(uint x, uint y) const {
return m_data[y * m_windowSize + x]; return m_data[y * m_windowSize + x];
@ -166,12 +166,12 @@ namespace nv
return m_windowSize; return m_windowSize;
} }
void initLaplacian(); NVIMAGE_API void initLaplacian();
void initEdgeDetection(); NVIMAGE_API void initEdgeDetection();
void initSobel(); NVIMAGE_API void initSobel();
void initPrewitt(); NVIMAGE_API void initPrewitt();
void initBlendedSobel(const Vector4 & scale); NVIMAGE_API void initBlendedSobel(const Vector4 & scale);
private: private:
const uint m_windowSize; const uint m_windowSize;
@ -180,12 +180,12 @@ namespace nv
/// A 1D polyphase kernel /// A 1D polyphase kernel
class NVIMAGE_CLASS PolyphaseKernel class PolyphaseKernel
{ {
NV_FORBID_COPY(PolyphaseKernel); NV_FORBID_COPY(PolyphaseKernel);
public: public:
PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32); NVIMAGE_API PolyphaseKernel(const Filter & f, uint srcLength, uint dstLength, int samples = 32);
~PolyphaseKernel(); NVIMAGE_API ~PolyphaseKernel();
int windowSize() const { int windowSize() const {
return m_windowSize; return m_windowSize;
@ -205,7 +205,7 @@ namespace nv
return m_data[column * m_windowSize + x]; return m_data[column * m_windowSize + x];
} }
void debugPrint() const; NVIMAGE_API void debugPrint() const;
private: private:
int m_windowSize; int m_windowSize;

View File

@ -376,7 +376,7 @@ FloatImage * FloatImage::fastDownSample() const
{ {
const uint n = w * h; const uint n = w * h;
if (n & 1) if ((m_width * m_height) & 1)
{ {
const float scale = 1.0f / (2 * n + 1); const float scale = 1.0f / (2 * n + 1);
@ -636,7 +636,7 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
float * dst_channel = dst_image->channel(c); float * dst_channel = dst_image->channel(c);
for (uint x = 0; x < w; x++) { for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer()); tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) { for (uint y = 0; y < h; y++) {
dst_channel[y * w + x] = tmp_column[y]; dst_channel[y * w + x] = tmp_column[y];
@ -657,7 +657,7 @@ FloatImage * FloatImage::downSample(const Filter & filter, uint w, uint h, WrapM
float * tmp_channel = tmp_image->channel(c); float * tmp_channel = tmp_image->channel(c);
for (uint x = 0; x < w; x++) { for (uint x = 0; x < w; x++) {
tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.mutableBuffer()); tmp_image->applyKernelVertical(ykernel, x, c, wm, tmp_column.unsecureBuffer());
for (uint y = 0; y < h; y++) { for (uint y = 0; y < h; y++) {
tmp_channel[y * w + x] = tmp_column[y]; tmp_channel[y * w + x] = tmp_column[y];

View File

@ -3,10 +3,8 @@
#ifndef NV_IMAGE_FLOATIMAGE_H #ifndef NV_IMAGE_FLOATIMAGE_H
#define NV_IMAGE_FLOATIMAGE_H #define NV_IMAGE_FLOATIMAGE_H
#include <stdlib.h> // abs
#include <nvcore/Debug.h> #include <nvcore/Debug.h>
#include <nvcore/Algorithms.h> // clamp #include <nvcore/Containers.h> // clamp
#include <nvimage/nvimage.h> #include <nvimage/nvimage.h>
namespace nv namespace nv

View File

@ -2,7 +2,6 @@
#include <nvcore/Debug.h> #include <nvcore/Debug.h>
#include <nvcore/Ptr.h> #include <nvcore/Ptr.h>
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h> #include <nvmath/Color.h>

View File

@ -162,9 +162,6 @@ FloatImage * nv::ImageIO::loadFloat(const char * fileName, Stream & s)
if (strCaseCmp(extension, ".pfm") == 0) { if (strCaseCmp(extension, ".pfm") == 0) {
return loadFloatPFM(fileName, s); return loadFloatPFM(fileName, s);
} }
if (strCaseCmp(extension, ".hdr") == 0) {
return loadGridFloat(fileName, s);
}
*/ */
return NULL; return NULL;
@ -797,7 +794,7 @@ Image * nv::ImageIO::loadJPG(Stream & s)
// Read the entire file. // Read the entire file.
Array<uint8> byte_array; Array<uint8> byte_array;
byte_array.resize(s.size()); byte_array.resize(s.size());
s.serialize(byte_array.mutableBuffer(), s.size()); s.serialize(byte_array.unsecureBuffer(), s.size());
jpeg_decompress_struct cinfo; jpeg_decompress_struct cinfo;
jpeg_error_mgr jerr; jpeg_error_mgr jerr;
@ -1092,8 +1089,7 @@ namespace
virtual void seekg(Imf::Int64 pos) virtual void seekg(Imf::Int64 pos)
{ {
nvDebugCheck(pos >= 0 && pos < UINT_MAX); m_stream.seek(pos);
m_stream.seek((uint)pos);
} }
virtual void clear() virtual void clear()
@ -1302,77 +1298,6 @@ bool nv::ImageIO::saveFloatPFM(const char * fileName, const FloatImage * fimage,
return true; return true;
} }
#pragma warning(disable : 4996)
NVIMAGE_API FloatImage * nv::ImageIO::loadGridFloat(const char * fileName, Stream & s)
{
nvCheck(s.isLoading());
nvCheck(!s.isError());
Tokenizer parser(&s);
parser.nextLine();
if (parser.token() != "ncols")
{
nvDebug("Failed to find 'ncols' token in file '%s'.\n", fileName);
return NULL;
}
parser.nextToken(true);
const int nCols = parser.token().toInt();
parser.nextToken(true);
if (parser.token() != "nrows")
{
nvDebug("Failed to find 'nrows' token in file '%s'.\n", fileName);
return NULL;
}
parser.nextToken(true);
const int nRows = parser.token().toInt();
/* There's a byte order defined in the header. We could read it. However, here we
just assume that it matches the platform's byte order.
// There is then a bunch of data that we don't care about (lat, long definitions, etc).
for (int i=0; i!=9; ++i)
parser.nextToken(true);
if (parser.token() != "byteorder")
return NULL;
parser.nextToken(true);
const Stream::ByteOrder byteOrder = (parser.token() == "LSBFIRST")? Stream::LittleEndian: Stream::BigEndian;
*/
// GridFloat comes in two files: an ASCII header which was parsed above (.hdr) and a big blob
// of binary data in a .flt file.
Path dataPath(fileName);
dataPath.stripExtension();
dataPath.append(".flt");
// Open the binary data.
FILE* file = fopen(dataPath.fileName(), "rb");
if (!file)
{
nvDebug("Failed to find GridFloat blob file '%s' corresponding to '%s'.\n", dataPath.fileName(), fileName);
return NULL;
}
// Allocate image.
AutoPtr<FloatImage> fimage(new FloatImage());
fimage->allocate(1, nCols, nRows);
float * channel = fimage->channel(0);
// The binary blob is defined to be in row-major order, containing IEEE floats.
// So we can just slurp it in. Theoretically, we ought to use the byte order.
const size_t nRead = fread((void*) channel, sizeof(float), nRows * nCols, file);
fclose(file);
return fimage.release();
}
#endif #endif
#if 0 #if 0

View File

@ -47,15 +47,10 @@ namespace nv
NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components); NVIMAGE_API bool saveFloatEXR(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
#endif #endif
/*
NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
// GridFloat is a simple, open format for terrain elevation data. See http://ned.usgs.gov/Ned/faq.asp. // NVIMAGE_API FloatImage * loadFloatPFM(const char * fileName, Stream & s);
// Expects: 1) fileName will be an ".hdr" header file, 2) there will also exist a corresponding float data // NVIMAGE_API bool saveFloatPFM(const char * fileName, const FloatImage * fimage, uint base_component, uint num_components);
// blob in a ".flt" file. (This is what USGS gives you.)
NVIMAGE_API FloatImage * loadGridFloat(const char * fileName, Stream & s);
*/
} // ImageIO namespace } // ImageIO namespace
} // nv namespace } // nv namespace

View File

@ -12,10 +12,6 @@ http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
@@ This code needs to be reviewed, I'm not sure it's correct. @@ This code needs to be reviewed, I'm not sure it's correct.
*/ */
#include <string.h> // memset
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h> #include <nvmath/Color.h>
#include <nvimage/Image.h> #include <nvimage/Image.h>

View File

@ -49,7 +49,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
return; return;
} }
} }
normal = nv::normalize(normal, epsilon); normal = ::normalize(normal, epsilon);
tangent -= normal * dot(normal, tangent); tangent -= normal * dot(normal, tangent);
bitangent -= normal * dot(normal, bitangent); bitangent -= normal * dot(normal, bitangent);
@ -68,8 +68,7 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
} }
else else
{ {
#if 0 tangent = ::normalize(tangent, epsilon);
tangent = nv::normalize(tangent, epsilon);
bitangent -= tangent * dot(tangent, bitangent); bitangent -= tangent * dot(tangent, bitangent);
if (length(bitangent) < epsilon) if (length(bitangent) < epsilon)
@ -79,47 +78,11 @@ void Basis::robustOrthonormalize(float epsilon /*= NV_EPSILON*/)
} }
else else
{ {
bitangent = nv::normalize(bitangent, epsilon); tangent = ::normalize(tangent, epsilon);
} }
#else
if (length(bitangent) < epsilon)
{
bitangent = cross(tangent, normal);
nvCheck(isNormalized(bitangent));
}
else
{
tangent = nv::normalize(tangent);
bitangent = nv::normalize(bitangent);
Vector3 bisector = nv::normalize(tangent + bitangent);
Vector3 axis = cross(bisector, normal);
nvDebugCheck(isNormalized(axis, epsilon));
nvDebugCheck(equal(dot(axis, tangent), -dot(axis, bitangent), epsilon));
if (dot(axis, tangent) > 0)
{
tangent = nv::normalize(bisector + axis);
bitangent = nv::normalize(bisector - axis);
}
else
{
tangent = nv::normalize(bisector - axis);
bitangent = nv::normalize(bisector + axis);
}
}
#endif
} }
/*// Check vector lengths. // Check vector lengths.
if (!isNormalized(normal, epsilon))
{
nvDebug("%f %f %f\n", normal.x(), normal.y(), normal.z());
nvDebug("%f %f %f\n", tangent.x(), tangent.y(), tangent.z());
nvDebug("%f %f %f\n", bitangent.x(), bitangent.y(), bitangent.z());
}*/
nvCheck(isNormalized(normal, epsilon)); nvCheck(isNormalized(normal, epsilon));
nvCheck(isNormalized(tangent, epsilon)); nvCheck(isNormalized(tangent, epsilon));
nvCheck(isNormalized(bitangent, epsilon)); nvCheck(isNormalized(bitangent, epsilon));
@ -162,18 +125,9 @@ void Basis::buildFrameForDirection(Vector3::Arg d)
bitangent = cross(normal, tangent); bitangent = cross(normal, tangent);
} }
bool Basis::isValid() const
{
if (equal(normal, Vector3(zero))) return false;
if (equal(tangent, Vector3(zero))) return false;
if (equal(bitangent, Vector3(zero))) return false;
if (equal(determinant(), 0.0f)) return false;
return true;
}
/*
/// Transform by this basis. (From this basis to object space). /// Transform by this basis. (From this basis to object space).
Vector3 Basis::transform(Vector3::Arg v) const Vector3 Basis::transform(Vector3::Arg v) const
{ {
@ -190,31 +144,30 @@ Vector3 Basis::transformT(Vector3::Arg v)
} }
/// Transform by the inverse. (From object space to this basis). /// Transform by the inverse. (From object space to this basis).
/// @note Uses Cramer's rule so the inverse is not accurate if the basis is ill-conditioned. /// @note Uses Kramer's rule so the inverse is not accurate if the basis is ill-conditioned.
Vector3 Basis::transformI(Vector3::Arg v) const Vector3 Basis::transformI(Vector3::Arg v) const
{ {
const float det = determinant(); const float det = determinant();
nvDebugCheck(!equal(det, 0.0f, 0.0f)); nvCheck(!equalf(det, 0.0f));
const float idet = 1.0f / det; const float idet = 1.0f / det;
// Rows of the inverse matrix. // Rows of the inverse matrix.
Vector3 r0( Vector3 r0, r1, r2;
(bitangent.y() * normal.z() - bitangent.z() * normal.y()), r0.x = (bitangent.y() * normal.z() - bitangent.z() * normal.y()) * idet;
-(bitangent.x() * normal.z() - bitangent.z() * normal.x()), r0.y = -(bitangent.x() * normal.z() - bitangent.z() * normal.x()) * idet;
(bitangent.x() * normal.y() - bitangent.y() * normal.x())); r0.z = (bitangent.x() * normal.y() - bitangent.y() * normal.x()) * idet;
Vector3 r1( r1.x = -(tangent.y() * normal.z() - tangent.z() * normal.y()) * idet;
-(tangent.y() * normal.z() - tangent.z() * normal.y()), r1.y = (tangent.x() * normal.z() - tangent.z() * normal.x()) * idet;
(tangent.x() * normal.z() - tangent.z() * normal.x()), r1.z = -(tangent.x() * normal.y() - tangent.y() * normal.x()) * idet;
-(tangent.x() * normal.y() - tangent.y() * normal.x()));
Vector3 r2( r2.x = (tangent.y() * bitangent.z() - tangent.z() * bitangent.y()) * idet;
(tangent.y() * bitangent.z() - tangent.z() * bitangent.y()), r2.y = -(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()) * idet;
-(tangent.x() * bitangent.z() - tangent.z() * bitangent.x()), r2.z = (tangent.x() * bitangent.y() - tangent.y() * bitangent.x()) * idet;
(tangent.x() * bitangent.y() - tangent.y() * bitangent.x()));
return Vector3(dot(v, r0), dot(v, r1), dot(v, r2)) * idet; return Vector3(dot(v, r0), dot(v, r1), dot(v, r2));
} }
*/

View File

@ -54,8 +54,7 @@ namespace nv
tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y(); tangent.z() * bitangent.x() * normal.y() - tangent.x() * bitangent.z() * normal.y();
} }
bool isValid() const; /*
// Get transform matrix for this basis. // Get transform matrix for this basis.
NVMATH_API Matrix matrix() const; NVMATH_API Matrix matrix() const;
@ -67,7 +66,7 @@ namespace nv
// Transform by the inverse. (From object space to this basis). // Transform by the inverse. (From object space to this basis).
NVMATH_API Vector3 transformI(Vector3::Arg v) const; NVMATH_API Vector3 transformI(Vector3::Arg v) const;
*/
Vector3 tangent; Vector3 tangent;
Vector3 bitangent; Vector3 bitangent;

View File

@ -9,7 +9,6 @@
namespace nv namespace nv
{ {
class Stream;
/// Axis Aligned Bounding Box. /// Axis Aligned Bounding Box.
class Box class Box
@ -28,13 +27,11 @@ public:
// Cast operators. // Cast operators.
operator const float * () const { return reinterpret_cast<const float *>(this); } operator const float * () const { return reinterpret_cast<const float *>(this); }
// Min corner of the box. /// Min corner of the box.
Vector3 minCorner() const { return m_mins; } Vector3 mins() const { return m_mins; }
Vector3 & minCorner() { return m_mins; }
// Max corner of the box. /// Max corner of the box.
Vector3 maxCorner() const { return m_maxs; } Vector3 maxs() const { return m_maxs; }
Vector3 & maxCorner() { return m_maxs; }
/// Clear the bounds. /// Clear the bounds.
void clearBounds() void clearBounds()
@ -111,7 +108,7 @@ public:
float area() const float area() const
{ {
const Vector3 d = extents(); const Vector3 d = extents();
return 8.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z()); return 4.0f * (d.x()*d.y() + d.x()*d.z() + d.y()*d.z());
} }
/// Get the volume of the box. /// Get the volume of the box.
@ -121,16 +118,6 @@ public:
return 8.0f * (d.x() * d.y() * d.z()); return 8.0f * (d.x() * d.y() * d.z());
} }
/// Return true if the box contains the given point.
bool contains(Vector3::Arg p) const
{
return
m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z();
}
friend Stream & operator<< (Stream & s, Box & box);
private: private:
Vector3 m_mins; Vector3 m_mins;
@ -138,6 +125,15 @@ private:
}; };
/*
/// Point inside box test.
inline bool pointInsideBox(const Box & b, Vector3::Arg p) const
{
return (m_mins.x() < p.x() && m_mins.y() < p.y() && m_mins.z() < p.z() &&
m_maxs.x() > p.x() && m_maxs.y() > p.y() && m_maxs.z() > p.z());
}
*/
} // nv namespace } // nv namespace

View File

@ -5,19 +5,13 @@ SET(MATH_SRCS
Vector.h Vector.h
Matrix.h Matrix.h
Quaternion.h Quaternion.h
Plane.h Plane.cpp
Box.h Box.h
Color.h Color.h
Montecarlo.h Montecarlo.cpp Montecarlo.h Montecarlo.cpp
Random.h Random.cpp Random.h Random.cpp
SphericalHarmonic.h SphericalHarmonic.cpp SphericalHarmonic.h SphericalHarmonic.cpp
Basis.h Basis.cpp Basis.h Basis.cpp
Triangle.h Triangle.cpp TriBox.cpp Triangle.h Triangle.cpp TriBox.cpp)
Polygon.h Polygon.cpp
TypeSerialization.h TypeSerialization.cpp
Sparse.h Sparse.cpp
Solver.h Solver.cpp
KahanSum.h)
INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR})

View File

@ -1,38 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_KAHANSUM_H
#define NV_MATH_KAHANSUM_H
#include <nvmath/nvmath.h>
namespace nv
{
class KahanSum
{
public:
KahanSum() : accum(0.0f), err(0) {};
void add(float f)
{
float compensated = f + err;
float tmp = accum + compensated;
err = accum - tmp;
err += compensated;
accum = tmp;
}
float sum() const
{
return accum;
}
private:
float accum;
float err;
};
} // nv namespace
#endif // NV_MATH_KAHANSUM_H

View File

@ -1,168 +0,0 @@
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
#include <nvmath/Polygon.h>
#include <nvmath/Triangle.h>
#include <nvmath/Plane.h>
using namespace nv;
Polygon::Polygon()
{
}
Polygon::Polygon(const Triangle & t)
{
pointArray.resize(3);
pointArray[0] = t.v[0];
pointArray[1] = t.v[1];
pointArray[2] = t.v[2];
}
Polygon::Polygon(const Vector3 * points, uint vertexCount)
{
pointArray.resize(vertexCount);
for (uint i = 0; i < vertexCount; i++)
{
pointArray[i] = points[i];
}
}
/// Compute polygon area.
float Polygon::area() const
{
float total = 0;
const uint pointCount = pointArray.count();
for (uint i = 2; i < pointCount; i++)
{
Vector3 v1 = pointArray[i-1] - pointArray[0];
Vector3 v2 = pointArray[i] - pointArray[0];
total += 0.5f * length(cross(v1, v2));
}
return total;
}
/// Get the bounds of the polygon.
Box Polygon::bounds() const
{
Box bounds;
bounds.clearBounds();
foreach(p, pointArray)
{
bounds.addPointToBounds(pointArray[p]);
}
return bounds;
}
/// Get the plane of the polygon.
Plane Polygon::plane() const
{
// @@ Do something better than this?
Vector3 n = cross(pointArray[1] - pointArray[0], pointArray[2] - pointArray[0]);
return Vector4(n, dot(n, pointArray[0]));
}
/// Clip polygon to box.
uint Polygon::clipTo(const Box & box)
{
const Plane posX( 1, 0, 0, box.maxCorner().x());
const Plane negX(-1, 0, 0,-box.minCorner().x());
const Plane posY( 0, 1, 0, box.maxCorner().y());
const Plane negY( 0,-1, 0,-box.minCorner().y());
const Plane posZ( 0, 0, 1, box.maxCorner().z());
const Plane negZ( 0, 0,-1,-box.minCorner().z());
if (clipTo(posX) == 0) return 0;
if (clipTo(negX) == 0) return 0;
if (clipTo(posY) == 0) return 0;
if (clipTo(negY) == 0) return 0;
if (clipTo(posZ) == 0) return 0;
if (clipTo(negZ) == 0) return 0;
return pointArray.count();
}
/// Clip polygon to plane.
uint Polygon::clipTo(const Plane & plane)
{
int count = 0;
const uint pointCount = pointArray.count();
Array<Vector3> newPointArray(pointCount + 1); // @@ Do not create copy every time.
Vector3 prevPoint = pointArray[pointCount - 1];
float prevDist = dot(plane.vector(), prevPoint) - plane.offset();
for (uint i = 0; i < pointCount; i++)
{
const Vector3 point = pointArray[i];
float dist = dot(plane.vector(), point) - plane.offset();
// @@ Handle points on plane better.
if (dist <= 0) // interior.
{
if (prevDist > 0) // exterior
{
// Add segment intersection point.
Vector3 dp = point - prevPoint;
float t = dist / prevDist;
newPointArray.append(point - dp * t);
}
// Add interior point.
newPointArray.append(point);
}
else if (dist > 0 && prevDist < 0)
{
// Add segment intersection point.
Vector3 dp = point - prevPoint;
float t = dist / prevDist;
newPointArray.append(point - dp * t);
}
prevPoint = point;
prevDist = dist;
}
swap(pointArray, newPointArray);
return count;
}
void Polygon::removeColinearPoints()
{
const uint pointCount = pointArray.count();
Array<Vector3> newPointArray(pointCount);
for (uint i = 0 ; i < pointCount; i++)
{
int j = (i + 1) % pointCount;
int k = (i + pointCount - 1) % pointCount;
Vector3 v1 = normalize(pointArray[j] - pointArray[i]);
Vector3 v2 = normalize(pointArray[i] - pointArray[k]);
if (dot(v1, v2) < 0.999)
{
newPointArray.append(pointArray[i]);
}
}
swap(pointArray, newPointArray);
}

View File

@ -1,45 +0,0 @@
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
#ifndef NV_MATH_POLYGON_H
#define NV_MATH_POLYGON_H
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
#include <nvmath/Vector.h>
#include <nvmath/Box.h>
namespace nv
{
class Box;
class Plane;
class Triangle;
class Polygon
{
NV_FORBID_COPY(Polygon);
public:
Polygon();
Polygon(const Triangle & t);
Polygon(const Vector3 * points, uint vertexCount);
float area() const;
Box bounds() const;
Plane plane() const;
uint clipTo(const Box & box);
uint clipTo(const Plane & plane);
void removeColinearPoints();
private:
Array<Vector3> pointArray;
};
} // nv namespace
#endif // NV_MATH_POLYGON_H

View File

@ -51,6 +51,7 @@ namespace nv
inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b) inline Quaternion mul(Quaternion::Arg a, Quaternion::Arg b)
{ {
// @@ Efficient SIMD implementation?
return Quaternion( return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(), + a.x() * b.w() + a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(), - a.x() * b.z() + a.y()*b.w() + a.z()*b.x() + a.w()*b.y(),
@ -58,40 +59,6 @@ namespace nv
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w()); - a.x() * b.x() - a.y()*b.y() - a.z()*b.z() + a.w()*b.w());
} }
inline Quaternion mul(Quaternion::Arg a, Vector3::Arg b)
{
return Quaternion(
+ a.y()*b.z() - a.z()*b.y() + a.w()*b.x(),
- a.x() * b.z() + a.z()*b.x() + a.w()*b.y(),
+ a.x() * b.y() - a.y()*b.x() + a.w()*b.z(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z() );
}
inline Quaternion mul(Vector3::Arg a, Quaternion::Arg b)
{
return Quaternion(
+ a.x() * b.w() + a.y()*b.z() - a.z()*b.y(),
- a.x() * b.z() + a.y()*b.w() + a.z()*b.x(),
+ a.x() * b.y() - a.y()*b.x() + a.z()*b.w(),
- a.x() * b.x() - a.y()*b.y() - a.z()*b.z());
}
inline Quaternion operator *(Quaternion::Arg a, Quaternion::Arg b)
{
return mul(a, b);
}
inline Quaternion operator *(Quaternion::Arg a, Vector3::Arg b)
{
return mul(a, b);
}
inline Quaternion operator *(Vector3::Arg a, Quaternion::Arg b)
{
return mul(a, b);
}
inline Quaternion scale(Quaternion::Arg q, float s) inline Quaternion scale(Quaternion::Arg q, float s)
{ {
return scale(q.asVector(), s); return scale(q.asVector(), s);
@ -155,24 +122,6 @@ namespace nv
return Quaternion(Vector4(v * s, c)); return Quaternion(Vector4(v * s, c));
} }
inline Vector3 imag(Quaternion::Arg q)
{
return q.asVector().xyz();
}
inline float real(Quaternion::Arg q)
{
return q.w();
}
/// Transform vector.
inline Vector3 transform(Quaternion::Arg q, Vector3::Arg v)
{
Quaternion t = q * v * conjugate(q);
return imag(t);
}
} // nv namespace } // nv namespace

View File

@ -1,726 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvmath/Solver.h>
using namespace nv;
namespace
{
class Preconditioner
{
public:
// Virtual dtor.
virtual ~Preconditioner() { }
// Apply preconditioning step.
virtual void apply(const FullVector & x, FullVector & y) const = 0;
};
// Jacobi preconditioner.
class JacobiPreconditioner : public Preconditioner
{
public:
JacobiPreconditioner(const SparseMatrix & M, bool symmetric) : m_inverseDiagonal(M.width())
{
nvCheck(M.isSquare());
for(uint x = 0; x < M.width(); x++)
{
float elem = M.getCoefficient(x, x);
nvDebugCheck( elem != 0.0f );
if (symmetric)
{
m_inverseDiagonal[x] = 1.0f / sqrt(fabs(elem));
}
else
{
m_inverseDiagonal[x] = 1.0f / elem;
}
}
}
void apply(const FullVector & x, FullVector & y) const
{
y *= x;
}
private:
FullVector m_inverseDiagonal;
};
} // namespace
static int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
static int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon);
// Solve the symmetric system: At<41>A<EFBFBD>x = At<41>b
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
{
nvDebugCheck(A.width() == x.dimension());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(A.height() >= A.width()); // @@ If height == width we could solve it directly...
const uint D = A.width();
FullVector Atb(D);
mult(Transposed, A, b, Atb);
SparseMatrix AtA(D);
mult(Transposed, A, NoTransposed, A, AtA);
SymmetricSolver(AtA, Atb, x, epsilon);
}
// See section 10.4.3 in: Mesh Parameterization: Theory and Practice, Siggraph Course Notes, August 2007
void nv::LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon/*= 1e-5f*/)
{
nvDebugCheck(A.width() == x.dimension());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(A.height() >= A.width() - lockedCount);
// @@ This is not the most efficient way of building a system with reduced degrees of freedom. It would be faster to do it on the fly.
const uint D = A.width() - lockedCount;
nvDebugCheck(D > 0);
// Compute: b - Al * xl
FullVector b_Alxl(b);
for (uint y = 0; y < A.height(); y++)
{
const uint count = A.getRow(y).count();
for (uint e = 0; e < count; e++)
{
uint column = A.getRow(y)[e].x;
bool isFree = true;
for (uint i = 0; i < lockedCount; i++)
{
isFree &= (lockedParameters[i] != column);
}
if (!isFree)
{
b_Alxl[y] -= x[column] * A.getRow(y)[e].v;
}
}
}
// Remove locked columns from A.
SparseMatrix Af(D, A.height());
for (uint y = 0; y < A.height(); y++)
{
const uint count = A.getRow(y).count();
for (uint e = 0; e < count; e++)
{
uint column = A.getRow(y)[e].x;
uint ix = column;
bool isFree = true;
for (uint i = 0; i < lockedCount; i++)
{
isFree &= (lockedParameters[i] != column);
if (column > lockedParameters[i]) ix--; // shift columns
}
if (isFree)
{
Af.setCoefficient(ix, y, A.getRow(y)[e].v);
}
}
}
// Remove elements from x
FullVector xf(D);
for (uint i = 0, j = 0; i < A.width(); i++)
{
bool isFree = true;
for (uint l = 0; l < lockedCount; l++)
{
isFree &= (lockedParameters[l] != i);
}
if (isFree)
{
xf[j++] = x[i];
}
}
// Solve reduced system.
LeastSquaresSolver(Af, b_Alxl, xf, epsilon);
// Copy results back to x.
for (uint i = 0, j = 0; i < A.width(); i++)
{
bool isFree = true;
for (uint l = 0; l < lockedCount; l++)
{
isFree &= (lockedParameters[l] != i);
}
if (isFree)
{
x[i] = xf[j++];
}
}
}
void nv::SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon/*1e-5f*/)
{
nvDebugCheck(A.height() == A.width());
nvDebugCheck(A.height() == b.dimension());
nvDebugCheck(b.dimension() == x.dimension());
// JacobiPreconditioner jacobi(A, true);
// ConjugateGradientSolver(jacobi, A, b, x, epsilon);
ConjugateGradientSolver(A, b, x, epsilon);
}
/**
* Compute the solution of the sparse linear system Ab=x using the Conjugate
* Gradient method.
*
* Solving sparse linear systems:
* (1) A<>x = b
*
* The conjugate gradient algorithm solves (1) only in the case that A is
* symmetric and positive definite. It is based on the idea of minimizing the
* function
*
* (2) f(x) = 1/2<>x<EFBFBD>A<EFBFBD>x - b<>x
*
* This function is minimized when its gradient
*
* (3) df = A<>x - b
*
* is zero, which is equivalent to (1). The minimization is carried out by
* generating a succession of search directions p.k and improved minimizers x.k.
* At each stage a quantity alfa.k is found that minimizes f(x.k + alfa.k<>p.k),
* and x.k+1 is set equal to the new point x.k + alfa.k<>p.k. The p.k and x.k are
* built up in such a way that x.k+1 is also the minimizer of f over the whole
* vector space of directions already taken, {p.1, p.2, . . . , p.k}. After N
* iterations you arrive at the minimizer over the entire vector space, i.e., the
* solution to (1).
*
* For a really good explanation of the method see:
*
* "An Introduction to the Conjugate Gradient Method Without the Agonizing Pain",
* Jonhathan Richard Shewchuk.
*
**/
/*static*/ int ConjugateGradientSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
{
nvDebugCheck( A.isSquare() );
nvDebugCheck( A.width() == b.dimension() );
nvDebugCheck( A.width() == x.dimension() );
int i = 0;
const int D = A.width();
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
FullVector r(D); // residual
FullVector p(D); // search direction
FullVector q(D); //
float delta_0;
float delta_old;
float delta_new;
float alpha;
float beta;
// r = b - A<>x;
copy(b, r);
sgemv(-1, A, x, 1, r);
// p = r;
copy(r, p);
delta_new = dot( r, r );
delta_0 = delta_new;
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
{
i++;
// q = A<>p
mult(A, p, q);
// alpha = delta_new / p<>q
alpha = delta_new / dot( p, q );
// x = alfa<66>p + x
saxpy(alpha, p, x);
if ((i & 31) == 0) // recompute r after 32 steps
{
// r = b - A<>x
copy(b, r);
sgemv(-1, A, x, 1, r);
}
else
{
// r = r - alpha<68>q
saxpy(-alpha, q, r);
}
delta_old = delta_new;
delta_new = dot( r, r );
beta = delta_new / delta_old;
// p = r + beta<74>p
copy(r, p);
saxpy(beta, p, r);
}
return i;
}
// Conjugate gradient with preconditioner.
/*static*/ int ConjugateGradientSolver(const Preconditioner & preconditioner, const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon)
{
nvDebugCheck( A.isSquare() );
nvDebugCheck( A.width() == b.dimension() );
nvDebugCheck( A.width() == x.dimension() );
int i = 0;
const int D = A.width();
const int i_max = 4 * D; // Convergence should be linear, but in some cases, it's not.
FullVector r(D); // residual
FullVector p(D); // search direction
FullVector q(D); //
FullVector s(D); // preconditioned
float delta_0;
float delta_old;
float delta_new;
float alpha;
float beta;
// r = b - A<>x
copy(b, r);
sgemv(-1, A, x, 1, r);
// p = M^-1 <20> r
preconditioner.apply(r, p);
//copy(r, p);
delta_new = dot(r, p);
delta_0 = delta_new;
while (i < i_max && delta_new > epsilon*epsilon*delta_0)
{
i++;
// q = A<>p
mult(A, p, q);
// alpha = delta_new / p<>q
alpha = delta_new / dot(p, q);
// x = alfa<66>p + x
saxpy(alpha, p, x);
if ((i & 31) == 0) // recompute r after 32 steps
{
// r = b - A<>x
copy(b, r);
sgemv(-1, A, x, 1, r);
}
else
{
// r = r - alfa<66>q
saxpy(-alpha, q, r);
}
// s = M^-1 <20> r
preconditioner.apply(r, s);
//copy(r, s);
delta_old = delta_new;
delta_new = dot( r, s );
beta = delta_new / delta_old;
// p = s + beta<74>p
copy(s, p);
saxpy(beta, p, s);
}
return i;
}
#if 0 // Nonsymmetric solvers
/** Bi-conjugate gradient method. */
MATHLIB_API int BiConjugateGradientSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = 4 * D;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha;
float beta;
DenseVector r(D);
DenseVector rtilde(D);
DenseVector p(D);
DenseVector ptilde(D);
DenseVector q(D);
DenseVector qtilde(D);
DenseVector tmp(D); // temporal vector.
// r = b - A<>x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
// p = r;
p.Set( r );
// ptilde = rtilde
ptilde.Set( rtilde );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i < i_max ) {
i++;
rho_1 = DenseVectorDotProduct( r, rtilde );
if( rho_1 == 0 ) {
// method fails.
return -i;
}
if (i == 1) {
p.Set( r );
ptilde.Set( rtilde );
}
else {
beta = rho_1 / rho_2;
// p = r + beta * p;
p.Mad( r, p, beta );
// ptilde = ztilde + beta * ptilde;
ptilde.Mad( rtilde, ptilde, beta );
}
// q = A * p;
A.Product( p, q );
// qtilde = A^t * ptilde;
A.TransProduct( ptilde, qtilde );
alpha = rho_1 / DenseVectorDotProduct( ptilde, q );
// x += alpha * p;
x.Mad( x, p, alpha );
// r -= alpha * q;
r.Mad( r, q, -alpha );
// rtilde -= alpha * qtilde;
rtilde.Mad( rtilde, qtilde, -alpha );
rho_2 = rho_1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges
return i;
}
}
return i;
}
/** Bi-conjugate gradient stabilized method. */
int BiCGSTABSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = 2 * D;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha = 0;
float beta = 0;
float omega = 0;
DenseVector p(D);
DenseVector phat(D);
DenseVector s(D);
DenseVector shat(D);
DenseVector t(D);
DenseVector v(D);
DenseVector r(D);
DenseVector rtilde(D);
DenseVector tmp(D);
// r = b - A<>x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i<i_max ) {
i++;
rho_1 = DenseVectorDotProduct( rtilde, r );
if( rho_1 == 0 ) {
// method fails
return -i;
}
if( i == 1 ) {
p.Set( r );
}
else {
beta = (rho_1 / rho_2) * (alpha / omega);
// p = r + beta * (p - omega * v);
p.Mad( p, v, -omega );
p.Mad( r, p, beta );
}
//phat = M.solve(p);
phat.Set( p );
//Precond( &phat, p );
//v = A * phat;
A.Product( phat, v );
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
// s = r - alpha * v;
s.Mad( r, v, -alpha );
resid = s.Norm() / normb;
if( resid < epsilon ) {
// x += alpha * phat;
x.Mad( x, phat, alpha );
return i;
}
//shat = M.solve(s);
shat.Set( s );
//Precond( &shat, s );
//t = A * shat;
A.Product( shat, t );
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
// x += alpha * phat + omega * shat;
x.Mad( x, shat, omega );
x.Mad( x, phat, alpha );
//r = s - omega * t;
r.Mad( s, t, -omega );
rho_2 = rho_1;
resid = r.Norm() / normb;
if( resid < epsilon ) {
return i;
}
if( omega == 0 ) {
return -i; // ???
}
}
return i;
}
/** Bi-conjugate gradient stabilized method. */
int BiCGSTABPrecondSolve( const SparseMatrix &A, const DenseVector &b, DenseVector &x, const IPreconditioner &M, float epsilon ) {
piDebugCheck( A.IsSquare() );
piDebugCheck( A.Width() == b.Dim() );
piDebugCheck( A.Width() == x.Dim() );
int i = 0;
const int D = A.Width();
const int i_max = D;
// const int i_max = 1000;
float resid;
float rho_1 = 0;
float rho_2 = 0;
float alpha = 0;
float beta = 0;
float omega = 0;
DenseVector p(D);
DenseVector phat(D);
DenseVector s(D);
DenseVector shat(D);
DenseVector t(D);
DenseVector v(D);
DenseVector r(D);
DenseVector rtilde(D);
DenseVector tmp(D);
// r = b - A<>x;
A.Product( x, tmp );
r.Sub( b, tmp );
// rtilde = r
rtilde.Set( r );
float normb = b.Norm();
if( normb == 0.0 ) normb = 1;
// test convergence
resid = r.Norm() / normb;
if( resid < epsilon ) {
// method converges?
return 0;
}
while( i<i_max ) {
i++;
rho_1 = DenseVectorDotProduct( rtilde, r );
if( rho_1 == 0 ) {
// method fails
return -i;
}
if( i == 1 ) {
p.Set( r );
}
else {
beta = (rho_1 / rho_2) * (alpha / omega);
// p = r + beta * (p - omega * v);
p.Mad( p, v, -omega );
p.Mad( r, p, beta );
}
//phat = M.solve(p);
//phat.Set( p );
M.Precond( &phat, p );
//v = A * phat;
A.Product( phat, v );
alpha = rho_1 / DenseVectorDotProduct( rtilde, v );
// s = r - alpha * v;
s.Mad( r, v, -alpha );
resid = s.Norm() / normb;
//printf( "--- Iteration %d: residual = %f\n", i, resid );
if( resid < epsilon ) {
// x += alpha * phat;
x.Mad( x, phat, alpha );
return i;
}
//shat = M.solve(s);
//shat.Set( s );
M.Precond( &shat, s );
//t = A * shat;
A.Product( shat, t );
omega = DenseVectorDotProduct( t, s ) / DenseVectorDotProduct( t, t );
// x += alpha * phat + omega * shat;
x.Mad( x, shat, omega );
x.Mad( x, phat, alpha );
//r = s - omega * t;
r.Mad( s, t, -omega );
rho_2 = rho_1;
resid = r.Norm() / normb;
if( resid < epsilon ) {
return i;
}
if( omega == 0 ) {
return -i; // ???
}
}
return i;
}
#endif

View File

@ -1,20 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SOLVER_H
#define NV_MATH_SOLVER_H
#include <nvmath/Sparse.h>
namespace nv
{
// Linear solvers.
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
NVMATH_API void LeastSquaresSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, const uint * lockedParameters, uint lockedCount, float epsilon = 1e-5f);
NVMATH_API void SymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
// NVMATH_API void NonSymmetricSolver(const SparseMatrix & A, const FullVector & b, FullVector & x, float epsilon = 1e-5f);
} // nv namespace
#endif // NV_MATH_SOLVER_H

View File

@ -1,831 +0,0 @@
// This code is in the public domain -- Ignacio Casta<74>o <castanyo@yahoo.es>
#include <nvmath/Sparse.h>
#include <nvmath/KahanSum.h>
using namespace nv;
/// Ctor.
FullVector::FullVector(uint dim)
{
m_array.resize(dim);
}
/// Copy ctor.
FullVector::FullVector(const FullVector & v) : m_array(v.m_array)
{
}
/// Copy operator
const FullVector & FullVector::operator=(const FullVector & v)
{
nvCheck(dimension() == v.dimension());
m_array = v.m_array;
return *this;
}
void FullVector::fill(float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] = f;
}
}
void FullVector::operator+= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] += v.m_array[i];
}
}
void FullVector::operator-= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] -= v.m_array[i];
}
}
void FullVector::operator*= (const FullVector & v)
{
nvDebugCheck(dimension() == v.dimension());
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] *= v.m_array[i];
}
}
void FullVector::operator+= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] += f;
}
}
void FullVector::operator-= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] -= f;
}
}
void FullVector::operator*= (float f)
{
const uint dim = dimension();
for (uint i = 0; i < dim; i++)
{
m_array[i] *= f;
}
}
void nv::saxpy(float a, const FullVector & x, FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
y[i] += a * x[i];
}
}
void nv::copy(const FullVector & x, FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
y[i] = x[i];
}
}
void nv::scal(float a, FullVector & x)
{
const uint dim = x.dimension();
for (uint i = 0; i < dim; i++)
{
x[i] *= a;
}
}
float nv::dot(const FullVector & x, const FullVector & y)
{
nvDebugCheck(x.dimension() == y.dimension());
const uint dim = x.dimension();
/*float sum = 0;
for (uint i = 0; i < dim; i++)
{
sum += x[i] * y[i];
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < dim; i++)
{
kahan.add(x[i] * y[i]);
}
return kahan.sum();
}
FullMatrix::FullMatrix(uint d) : m_width(d), m_height(d)
{
m_array.resize(d*d, 0.0f);
}
FullMatrix::FullMatrix(uint w, uint h) : m_width(w), m_height(h)
{
m_array.resize(w*h, 0.0f);
}
FullMatrix::FullMatrix(const FullMatrix & m) : m_width(m.m_width), m_height(m.m_height)
{
m_array = m.m_array;
}
const FullMatrix & FullMatrix::operator=(const FullMatrix & m)
{
nvCheck(width() == m.width());
nvCheck(height() == m.height());
m_array = m.m_array;
return *this;
}
float FullMatrix::getCoefficient(uint x, uint y) const
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
return m_array[y * width() + x];
}
void FullMatrix::setCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] = f;
}
void FullMatrix::addCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] += f;
}
void FullMatrix::mulCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
m_array[y * width() + x] *= f;
}
float FullMatrix::dotRow(uint y, const FullVector & v) const
{
nvDebugCheck( v.dimension() == width() );
nvDebugCheck( y < height() );
float sum = 0;
const uint count = v.dimension();
for (uint i = 0; i < count; i++)
{
sum += m_array[y * count + i] * v[i];
}
return sum;
}
void FullMatrix::madRow(uint y, float alpha, FullVector & v) const
{
nvDebugCheck( v.dimension() == width() );
nvDebugCheck( y < height() );
const uint count = v.dimension();
for (uint i = 0; i < count; i++)
{
v[i] += m_array[y * count + i];
}
}
// y = M * x
void nv::mult(const FullMatrix & M, const FullVector & x, FullVector & y)
{
mult(NoTransposed, M, x, y);
}
void nv::mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y)
{
const uint w = M.width();
const uint h = M.height();
if (TM == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
y.fill(0.0f);
for (uint i = 0; i < h; i++)
{
M.madRow(i, x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = M.dotRow(i, x);
}
}
}
// y = alpha*A*x + beta*y
void nv::sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
{
sgemv(alpha, NoTransposed, A, x, beta, y);
}
void nv::sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y)
{
const uint w = A.width();
const uint h = A.height();
if (TA == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
for (uint i = 0; i < h; i++)
{
A.madRow(i, alpha * x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
}
}
}
// Multiply a row of A by a column of B.
static float dot(uint j, Transpose TA, const FullMatrix & A, uint i, Transpose TB, const FullMatrix & B)
{
const uint w = (TA == NoTransposed) ? A.width() : A.height();
nvDebugCheck(w == (TB == NoTransposed) ? B.height() : A.width());
float sum = 0.0f;
for (uint k = 0; k < w; k++)
{
const float a = (TA == NoTransposed) ? A.getCoefficient(k, j) : A.getCoefficient(j, k); // @@ Move branches out of the loop?
const float b = (TB == NoTransposed) ? B.getCoefficient(i, k) : A.getCoefficient(k, i);
sum += a * b;
}
return sum;
}
// C = A * B
void nv::mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C)
{
mult(NoTransposed, A, NoTransposed, B, C);
}
void nv::mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C)
{
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
}
// C = alpha*A*B + beta*C
void nv::sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C)
{
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
}
void nv::sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C)
{
const uint w = C.width();
const uint h = C.height();
uint aw = (TA == NoTransposed) ? A.width() : A.height();
uint ah = (TA == NoTransposed) ? A.height() : A.width();
uint bw = (TB == NoTransposed) ? B.width() : B.height();
uint bh = (TB == NoTransposed) ? B.height() : B.width();
nvDebugCheck(aw == bh);
nvDebugCheck(bw == ah);
nvDebugCheck(w == bw);
nvDebugCheck(h == ah);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
float c = alpha * ::dot(x, TA, A, y, TB, B) + beta * C.getCoefficient(x, y);
C.setCoefficient(x, y, c);
}
}
}
/// Ctor. Init the size of the sparse matrix.
SparseMatrix::SparseMatrix(uint d) : m_width(d)
{
m_array.resize(d);
}
/// Ctor. Init the size of the sparse matrix.
SparseMatrix::SparseMatrix(uint w, uint h) : m_width(w)
{
m_array.resize(h);
}
SparseMatrix::SparseMatrix(const SparseMatrix & m) : m_width(m.m_width)
{
m_array = m.m_array;
}
const SparseMatrix & SparseMatrix::operator=(const SparseMatrix & m)
{
nvCheck(width() == m.width());
nvCheck(height() == m.height());
m_array = m.m_array;
return *this;
}
// x is column, y is row
float SparseMatrix::getCoefficient(uint x, uint y) const
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x) return m_array[y][i].v;
}
return 0.0f;
}
void SparseMatrix::setCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v = f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
void SparseMatrix::addCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v += f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
void SparseMatrix::mulCoefficient(uint x, uint y, float f)
{
nvDebugCheck( x < width() );
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
if (m_array[y][i].x == x)
{
m_array[y][i].v *= f;
return;
}
}
Coefficient c = { x, f };
m_array[y].append( c );
}
float SparseMatrix::sumRow(uint y) const
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
/*float sum = 0;
for (uint i = 0; i < count; i++)
{
sum += m_array[y][i].v;
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
kahan.add(m_array[y][i].v);
}
return kahan.sum();
}
float SparseMatrix::dotRow(uint y, const FullVector & v) const
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
/*float sum = 0;
for (uint i = 0; i < count; i++)
{
sum += m_array[y][i].v * v[m_array[y][i].x];
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
kahan.add(m_array[y][i].v * v[m_array[y][i].x]);
}
return kahan.sum();
}
void SparseMatrix::madRow(uint y, float alpha, FullVector & v) const
{
nvDebugCheck(y < height());
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
v[m_array[y][i].x] += alpha * m_array[y][i].v;
}
}
void SparseMatrix::clearRow(uint y)
{
nvDebugCheck( y < height() );
m_array[y].clear();
}
void SparseMatrix::scaleRow(uint y, float f)
{
nvDebugCheck( y < height() );
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
m_array[y][i].v *= f;
}
}
void SparseMatrix::normalizeRow(uint y)
{
nvDebugCheck( y < height() );
float norm = 0.0f;
const uint count = m_array[y].count();
for (uint i = 0; i < count; i++)
{
float f = m_array[y][i].v;
norm += f * f;
}
scaleRow(y, 1.0f / sqrtf(norm));
}
void SparseMatrix::clearColumn(uint x)
{
nvDebugCheck(x < width());
for (uint y = 0; y < height(); y++)
{
const uint count = m_array[y].count();
for (uint e = 0; e < count; e++)
{
if (m_array[y][e].x == x)
{
m_array[y][e].v = 0.0f;
break;
}
}
}
}
void SparseMatrix::scaleColumn(uint x, float f)
{
nvDebugCheck(x < width());
for (uint y = 0; y < height(); y++)
{
const uint count = m_array[y].count();
for (uint e = 0; e < count; e++)
{
if (m_array[y][e].x == x)
{
m_array[y][e].v *= f;
break;
}
}
}
}
const Array<SparseMatrix::Coefficient> & SparseMatrix::getRow(uint y) const
{
return m_array[y];
}
// y = M * x
void nv::mult(const SparseMatrix & M, const FullVector & x, FullVector & y)
{
mult(NoTransposed, M, x, y);
}
void nv::mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y)
{
const uint w = M.width();
const uint h = M.height();
if (TM == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
y.fill(0.0f);
for (uint i = 0; i < h; i++)
{
M.madRow(i, x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = M.dotRow(i, x);
}
}
}
// y = alpha*A*x + beta*y
void nv::sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
{
sgemv(alpha, NoTransposed, A, x, beta, y);
}
void nv::sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y)
{
const uint w = A.width();
const uint h = A.height();
if (TA == Transposed)
{
nvDebugCheck( h == x.dimension() );
nvDebugCheck( w == y.dimension() );
for (uint i = 0; i < h; i++)
{
A.madRow(i, alpha * x[i], y);
}
}
else
{
nvDebugCheck( w == x.dimension() );
nvDebugCheck( h == y.dimension() );
for (uint i = 0; i < h; i++)
{
y[i] = alpha * A.dotRow(i, x) + beta * y[i];
}
}
}
// dot y-row of A by x-column of B
static float dotRowColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
const uint count = row.count();
/*float sum = 0.0f;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
sum += c.v * B.getCoefficient(x, c.x);
}
return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
kahan.add(c.v * B.getCoefficient(x, c.x));
}
return kahan.sum();
}
// dot y-row of A by x-row of B
static float dotRowRow(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
const Array<SparseMatrix::Coefficient> & row = A.getRow(y);
const uint count = row.count();
/*float sum = 0.0f;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
sum += c.v * B.getCoefficient(c.x, x);
}
//return sum;*/
KahanSum kahan;
for (uint i = 0; i < count; i++)
{
const SparseMatrix::Coefficient & c = row[i];
kahan.add(c.v * B.getCoefficient(c.x, x));
}
return kahan.sum();
}
// dot y-column of A by x-column of B
static float dotColumnColumn(int y, const SparseMatrix & A, int x, const SparseMatrix & B)
{
nvDebugCheck(A.height() == B.height());
const uint h = A.height();
/*float sum = 0.0f;
for (uint i = 0; i < h; i++)
{
sum += A.getCoefficient(y, i) * B.getCoefficient(x, i);
}
//return sum;*/
KahanSum kahan;
for (uint i = 0; i < h; i++)
{
kahan.add(A.getCoefficient(y, i) * B.getCoefficient(x, i));
}
return kahan.sum();
}
// C = A * B
void nv::mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C)
{
mult(NoTransposed, A, NoTransposed, B, C);
}
void nv::mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C)
{
sgemm(1.0f, TA, A, TB, B, 0.0f, C);
}
// C = alpha*A*B + beta*C
void nv::sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C)
{
sgemm(alpha, NoTransposed, A, NoTransposed, B, beta, C);
}
void nv::sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C)
{
const uint w = C.width();
const uint h = C.height();
uint aw = (TA == NoTransposed) ? A.width() : A.height();
uint ah = (TA == NoTransposed) ? A.height() : A.width();
uint bw = (TB == NoTransposed) ? B.width() : B.height();
uint bh = (TB == NoTransposed) ? B.height() : B.width();
nvDebugCheck(aw == bh);
nvDebugCheck(bw == ah);
nvDebugCheck(w == bw);
nvDebugCheck(h == ah);
for (uint y = 0; y < h; y++)
{
for (uint x = 0; x < w; x++)
{
float c = beta * C.getCoefficient(x, y);
if (TA == NoTransposed && TB == NoTransposed)
{
// dot y-row of A by x-column of B.
c += alpha * dotRowColumn(y, A, x, B);
}
else if (TA == Transposed && TB == Transposed)
{
// dot y-column of A by x-row of B.
c += alpha * dotRowColumn(x, B, y, A);
}
else if (TA == Transposed && TB == NoTransposed)
{
// dot y-column of A by x-column of B.
c += alpha * dotColumnColumn(y, A, x, B);
}
else if (TA == NoTransposed && TB == Transposed)
{
// dot y-row of A by x-row of B.
c += alpha * dotRowRow(y, A, x, B);
}
if (c != 0.0f)
{
C.setCoefficient(x, y, c);
}
}
}
}
// C = At * A
void nv::sqm(const SparseMatrix & A, SparseMatrix & C)
{
// This is quite expensive...
mult(Transposed, A, NoTransposed, A, C);
}

View File

@ -1,198 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_SPARSE_H
#define NV_MATH_SPARSE_H
#include <nvcore/Containers.h>
#include <nvmath/nvmath.h>
// Full and sparse vector and matrix classes. BLAS subset.
namespace nv
{
class FullVector;
class FullMatrix;
class SparseMatrix;
/// Fixed size vector class.
class FullVector
{
public:
FullVector(uint dim);
FullVector(const FullVector & v);
const FullVector & operator=(const FullVector & v);
uint dimension() const { return m_array.count(); }
const float & operator[]( uint index ) const { return m_array[index]; }
float & operator[] ( uint index ) { return m_array[index]; }
void fill(float f);
void operator+= (const FullVector & v);
void operator-= (const FullVector & v);
void operator*= (const FullVector & v);
void operator+= (float f);
void operator-= (float f);
void operator*= (float f);
private:
Array<float> m_array;
};
// Pseudo-BLAS interface.
NVMATH_API void saxpy(float a, const FullVector & x, FullVector & y); // y = a * x + y
NVMATH_API void copy(const FullVector & x, FullVector & y);
NVMATH_API void scal(float a, FullVector & x);
NVMATH_API float dot(const FullVector & x, const FullVector & y);
enum Transpose
{
NoTransposed = 0,
Transposed = 1
};
/// Full matrix class.
class FullMatrix
{
public:
FullMatrix(uint d);
FullMatrix(uint w, uint h);
FullMatrix(const FullMatrix & m);
const FullMatrix & operator=(const FullMatrix & m);
uint width() const { return m_width; }
uint height() const { return m_height; }
bool isSquare() const { return m_width == m_height; }
float getCoefficient(uint x, uint y) const;
void setCoefficient(uint x, uint y, float f);
void addCoefficient(uint x, uint y, float f);
void mulCoefficient(uint x, uint y, float f);
float dotRow(uint y, const FullVector & v) const;
void madRow(uint y, float alpha, FullVector & v) const;
protected:
bool isValid() const {
return m_array.size() == (m_width * m_height);
}
private:
const uint m_width;
const uint m_height;
Array<float> m_array;
};
NVMATH_API void mult(const FullMatrix & M, const FullVector & x, FullVector & y);
NVMATH_API void mult(Transpose TM, const FullMatrix & M, const FullVector & x, FullVector & y);
// y = alpha*A*x + beta*y
NVMATH_API void sgemv(float alpha, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void sgemv(float alpha, Transpose TA, const FullMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void mult(const FullMatrix & A, const FullMatrix & B, FullMatrix & C);
NVMATH_API void mult(Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, FullMatrix & C);
// C = alpha*A*B + beta*C
NVMATH_API void sgemm(float alpha, const FullMatrix & A, const FullMatrix & B, float beta, FullMatrix & C);
NVMATH_API void sgemm(float alpha, Transpose TA, const FullMatrix & A, Transpose TB, const FullMatrix & B, float beta, FullMatrix & C);
/**
* Sparse matrix class. The matrix is assumed to be sparse and to have
* very few non-zero elements, for this reason it's stored in indexed
* format. To multiply column vectors efficiently, the matrix stores
* the elements in indexed-column order, there is a list of indexed
* elements for each row of the matrix. As with the FullVector the
* dimension of the matrix is constant.
**/
class SparseMatrix
{
friend class FullMatrix;
public:
// An element of the sparse array.
struct Coefficient {
uint x; // column
float v; // value
};
public:
SparseMatrix(uint d);
SparseMatrix(uint w, uint h);
SparseMatrix(const SparseMatrix & m);
const SparseMatrix & operator=(const SparseMatrix & m);
uint width() const { return m_width; }
uint height() const { return m_array.count(); }
bool isSquare() const { return width() == height(); }
float getCoefficient(uint x, uint y) const; // x is column, y is row
void setCoefficient(uint x, uint y, float f);
void addCoefficient(uint x, uint y, float f);
void mulCoefficient(uint x, uint y, float f);
float sumRow(uint y) const;
float dotRow(uint y, const FullVector & v) const;
void madRow(uint y, float alpha, FullVector & v) const;
void clearRow(uint y);
void scaleRow(uint y, float f);
void normalizeRow(uint y);
void clearColumn(uint x);
void scaleColumn(uint x, float f);
const Array<Coefficient> & getRow(uint y) const;
private:
/// Number of columns.
const uint m_width;
/// Array of matrix elements.
Array< Array<Coefficient> > m_array;
};
NVMATH_API void mult(const SparseMatrix & M, const FullVector & x, FullVector & y);
NVMATH_API void mult(Transpose TM, const SparseMatrix & M, const FullVector & x, FullVector & y);
// y = alpha*A*x + beta*y
NVMATH_API void sgemv(float alpha, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void sgemv(float alpha, Transpose TA, const SparseMatrix & A, const FullVector & x, float beta, FullVector & y);
NVMATH_API void mult(const SparseMatrix & A, const SparseMatrix & B, SparseMatrix & C);
NVMATH_API void mult(Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, SparseMatrix & C);
// C = alpha*A*B + beta*C
NVMATH_API void sgemm(float alpha, const SparseMatrix & A, const SparseMatrix & B, float beta, SparseMatrix & C);
NVMATH_API void sgemm(float alpha, Transpose TA, const SparseMatrix & A, Transpose TB, const SparseMatrix & B, float beta, SparseMatrix & C);
// C = At * A
NVMATH_API void sqm(const SparseMatrix & A, SparseMatrix & C);
} // nv namespace
#endif // NV_MATH_SPARSE_H

View File

@ -3,7 +3,6 @@
#ifndef NV_MATH_SPHERICALHARMONIC_H #ifndef NV_MATH_SPHERICALHARMONIC_H
#define NV_MATH_SPHERICALHARMONIC_H #define NV_MATH_SPHERICALHARMONIC_H
#include <string.h> // memcpy
#include <nvmath/Vector.h> #include <nvmath/Vector.h>
namespace nv namespace nv

View File

@ -96,7 +96,7 @@ static bool planeBoxOverlap(Vector3::Arg normal, Vector3::Arg vert, Vector3::Arg
if(min>rad || max<-rad) return false; if(min>rad || max<-rad) return false;
bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) bool triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{ {
// use separating axis theorem to test overlap between triangle and box // use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions: // need to test for overlap in these directions:
@ -170,7 +170,7 @@ bool nv::triBoxOverlap(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const T
} }
bool nv::triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri) bool triBoxOverlapNoBounds(Vector3::Arg boxcenter, Vector3::Arg boxhalfsize, const Triangle & tri)
{ {
// use separating axis theorem to test overlap between triangle and box // use separating axis theorem to test overlap between triangle and box
// need to test for overlap in these directions: // need to test for overlap in these directions:

View File

@ -1,82 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#include <nvcore/Stream.h>
#include <nvmath/Vector.h>
#include <nvmath/Matrix.h>
#include <nvmath/Quaternion.h>
#include <nvmath/Basis.h>
#include <nvmath/Box.h>
#include <nvmath/TypeSerialization.h>
using namespace nv;
Stream & nv::operator<< (Stream & s, Vector2 & v)
{
float x = v.x();
float y = v.y();
s << x << y;
if (s.isLoading())
{
v.set(x, y);
}
return s;
}
Stream & nv::operator<< (Stream & s, Vector3 & v)
{
float x = v.x();
float y = v.y();
float z = v.z();
s << x << y << z;
if (s.isLoading())
{
v.set(x, y, z);
}
return s;
}
Stream & nv::operator<< (Stream & s, Vector4 & v)
{
float x = v.x();
float y = v.y();
float z = v.z();
float w = v.w();
s << x << y << z << w;
if (s.isLoading())
{
v.set(x, y, z, w);
}
return s;
}
Stream & nv::operator<< (Stream & s, Matrix & m)
{
return s;
}
Stream & nv::operator<< (Stream & s, Quaternion & q)
{
return s << q.asVector();
}
Stream & nv::operator<< (Stream & s, Basis & basis)
{
return s << basis.tangent << basis.bitangent << basis.normal;
}
Stream & nv::operator<< (Stream & s, Box & box)
{
return s << box.m_mins << box.m_maxs;
}

View File

@ -1,32 +0,0 @@
// This code is in the public domain -- castanyo@yahoo.es
#ifndef NV_MATH_TYPESERIALIZATION_H
#define NV_MATH_TYPESERIALIZATION_H
#include <nvmath/nvmath.h>
namespace nv
{
class Stream;
class Vector2;
class Vector3;
class Vector4;
class Matrix;
class Quaternion;
struct Basis;
class Box;
NVMATH_API Stream & operator<< (Stream & s, Vector2 & obj);
NVMATH_API Stream & operator<< (Stream & s, Vector3 & obj);
NVMATH_API Stream & operator<< (Stream & s, Vector4 & obj);
NVMATH_API Stream & operator<< (Stream & s, Matrix & obj);
NVMATH_API Stream & operator<< (Stream & s, Quaternion & obj);
NVMATH_API Stream & operator<< (Stream & s, Basis & obj);
NVMATH_API Stream & operator<< (Stream & s, Box & obj);
} // nv namespace
#endif // NV_MATH_TYPESERIALIZATION_H

View File

@ -4,7 +4,7 @@
#define NV_MATH_VECTOR_H #define NV_MATH_VECTOR_H
#include <nvmath/nvmath.h> #include <nvmath/nvmath.h>
#include <nvcore/Algorithms.h> // min, max #include <nvcore/Containers.h> // min, max
namespace nv namespace nv
{ {
@ -71,7 +71,6 @@ public:
const Vector2 & xy() const; const Vector2 & xy() const;
scalar component(uint idx) const; scalar component(uint idx) const;
void setComponent(uint idx, scalar f);
const scalar * ptr() const; const scalar * ptr() const;
@ -240,21 +239,13 @@ inline const Vector2 & Vector3::xy() const
inline scalar Vector3::component(uint idx) const inline scalar Vector3::component(uint idx) const
{ {
nvDebugCheck(idx < 3); nvDebugCheck(idx < 3);
if (idx == 0) return m_x; if (idx == 0) return x();
if (idx == 1) return m_y; if (idx == 1) return y();
if (idx == 2) return m_z; if (idx == 2) return z();
nvAssume(false); nvAssume(false);
return 0.0f; return 0.0f;
} }
inline void Vector3::setComponent(uint idx, float f)
{
nvDebugCheck(idx < 3);
if (idx == 0) m_x = f;
else if (idx == 1) m_y = f;
else if (idx == 2) m_z = f;
}
inline const scalar * Vector3::ptr() const inline const scalar * Vector3::ptr() const
{ {
return &m_x; return &m_x;
@ -486,35 +477,6 @@ inline scalar length(Vector2::Arg v)
return sqrtf(length_squared(v)); return sqrtf(length_squared(v));
} }
inline scalar inverse_length(Vector2::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector2::Arg v, float epsilon = NV_NORMAL_EPSILON)
{
return equal(length(v), 1, epsilon);
}
inline Vector2 normalize(Vector2::Arg v, float epsilon = NV_EPSILON)
{
float l = length(v);
nvDebugCheck(!isZero(l, epsilon));
Vector2 n = scale(v, 1.0f / l);
nvDebugCheck(isNormalized(n));
return n;
}
inline Vector2 normalizeSafe(Vector2::Arg v, Vector2::Arg fallback, float epsilon = NV_EPSILON)
{
float l = length(v);
if (isZero(l, epsilon)) {
return fallback;
}
return scale(v, 1.0f / l);
}
inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON) inline bool equal(Vector2::Arg v1, Vector2::Arg v2, float epsilon = NV_EPSILON)
{ {
return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon); return equal(v1.x(), v2.x(), epsilon) && equal(v1.y(), v2.y(), epsilon);
@ -633,11 +595,6 @@ inline scalar length(Vector3::Arg v)
return sqrtf(length_squared(v)); return sqrtf(length_squared(v));
} }
inline scalar inverse_length(Vector3::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON) inline bool isNormalized(Vector3::Arg v, float epsilon = NV_NORMAL_EPSILON)
{ {
return equal(length(v), 1, epsilon); return equal(length(v), 1, epsilon);
@ -759,11 +716,6 @@ inline scalar length(Vector4::Arg v)
return sqrtf(length_squared(v)); return sqrtf(length_squared(v));
} }
inline scalar inverse_length(Vector4::Arg v)
{
return 1.0f / sqrtf(length_squared(v));
}
inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON) inline bool isNormalized(Vector4::Arg v, float epsilon = NV_NORMAL_EPSILON)
{ {
return equal(length(v), 1, epsilon); return equal(length(v), 1, epsilon);

View File

@ -136,11 +136,6 @@ inline float lerp(float f0, float f1, float t)
return f0 * s + f1 * t; return f0 * s + f1 * t;
} }
inline float square(float f)
{
return f * f;
}
} // nv } // nv
#endif // NV_MATH_H #endif // NV_MATH_H

View File

@ -13,10 +13,10 @@ SET(NVTT_SRCS
CompressDXT.cpp CompressDXT.cpp
CompressRGB.h CompressRGB.h
CompressRGB.cpp CompressRGB.cpp
FastCompressDXT.h
FastCompressDXT.cpp
QuickCompressDXT.h QuickCompressDXT.h
QuickCompressDXT.cpp QuickCompressDXT.cpp
OptimalCompressDXT.h
OptimalCompressDXT.cpp
SingleColorLookup.h SingleColorLookup.h
CompressionOptions.h CompressionOptions.h
CompressionOptions.cpp CompressionOptions.cpp
@ -79,9 +79,6 @@ TARGET_LINK_LIBRARIES(nvassemble nvcore nvmath nvimage)
ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h) ADD_EXECUTABLE(filtertest tests/filtertest.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage) TARGET_LINK_LIBRARIES(filtertest nvcore nvmath nvimage)
ADD_EXECUTABLE(stress tests/stress.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(stress nvcore nvmath nvimage nvtt)
ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h) ADD_EXECUTABLE(nvzoom tools/resize.cpp tools/cmdline.h)
TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage) TARGET_LINK_LIBRARIES(nvzoom nvcore nvmath nvimage)

View File

@ -29,8 +29,8 @@
#include "nvtt.h" #include "nvtt.h"
#include "CompressDXT.h" #include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "QuickCompressDXT.h" #include "QuickCompressDXT.h"
#include "OptimalCompressDXT.h"
#include "CompressionOptions.h" #include "CompressionOptions.h"
#include "OutputOptions.h" #include "OutputOptions.h"
@ -57,26 +57,33 @@ using namespace nv;
using namespace nvtt; using namespace nvtt;
void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & outputOptions) nv::FastCompressor::FastCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{ {
const uint w = image->width(); }
const uint h = image->height();
nv::FastCompressor::~FastCompressor()
{
}
void nv::FastCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
{
m_image = image;
m_alphaMode = alphaMode;
}
void nv::FastCompressor::compressDXT1(const OutputOptions::Private & outputOptions)
{
const uint w = m_image->width();
const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT1 block; BlockDXT1 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1(rgba, &block); QuickCompress::compressDXT1(rgba, &block);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -86,27 +93,19 @@ void nv::fastCompressDXT1(const Image * image, const OutputOptions::Private & ou
} }
void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & outputOptions) void nv::FastCompressor::compressDXT1a(const OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT1 block; BlockDXT1 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
// @@ We could do better here: check for single RGB, but varying alpha.
if (rgba.isSingleColor())
{
QuickCompress::compressDXT1a(rgba.color(0), &block);
}
else
{
QuickCompress::compressDXT1a(rgba, &block); QuickCompress::compressDXT1a(rgba, &block);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -116,17 +115,18 @@ void nv::fastCompressDXT1a(const Image * image, const OutputOptions::Private & o
} }
void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions) void nv::FastCompressor::compressDXT3(const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT3 block; BlockDXT3 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
QuickCompress::compressDXT3(rgba, &block); QuickCompress::compressDXT3(rgba, &block);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
@ -137,19 +137,19 @@ void nv::fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Privat
} }
void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions) void nv::FastCompressor::compressDXT5(const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT5 block; BlockDXT5 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!!
nv::compressBlock_BoundsRange(rgba, &block); QuickCompress::compressDXT5(rgba, &block, 0);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -159,23 +159,21 @@ void nv::fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Privat
} }
void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions) void nv::FastCompressor::compressDXT5n(const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT5 block; BlockDXT5 block;
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
// copy X coordinate to alpha channel and Y coordinate to green channel.
rgba.swizzleDXT5n(); rgba.swizzleDXT5n();
//QuickCompress::compressDXT5(rgba, &block); // @@ Use fast version!! QuickCompress::compressDXT5(rgba, &block, 0);
nv::compressBlock_BoundsRange(rgba, &block);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -185,42 +183,28 @@ void nv::fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Priva
} }
void nv::fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions) nv::SlowCompressor::SlowCompressor() : m_image(NULL), m_alphaMode(AlphaMode_None)
{ {
// @@ TODO
// compress red channel (X)
} }
nv::SlowCompressor::~SlowCompressor()
void nv::fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions)
{ {
// @@ TODO
// compress red, green channels (X,Y)
} }
void nv::SlowCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
void nv::doPrecomputation()
{ {
static bool done = false; // @@ Stop using statics for reentrancy. Although the worst that could happen is that this stuff is precomputed multiple times. m_image = image;
m_alphaMode = alphaMode;
if (!done)
{
done = true;
squish::FastClusterFit::DoPrecomputation();
}
} }
void nv::SlowCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
void nv::compressDXT1(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT1 block; BlockDXT1 block;
doPrecomputation();
//squish::WeightedClusterFit fit; //squish::WeightedClusterFit fit;
//squish::ClusterFit fit; //squish::ClusterFit fit;
squish::FastClusterFit fit; squish::FastClusterFit fit;
@ -229,11 +213,11 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
if (rgba.isSingleColor()) if (rgba.isSingleColor())
{ {
QuickCompress::compressDXT1(rgba.color(0), &block); OptimalCompress::compressDXT1(rgba.color(0), &block);
} }
else else
{ {
@ -250,10 +234,10 @@ void nv::compressDXT1(const Image * image, const OutputOptions::Private & output
} }
void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions) void nv::SlowCompressor::compressDXT1a(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT1 block; BlockDXT1 block;
@ -264,11 +248,20 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
if (rgba.isSingleColor()) bool anyAlpha = false;
bool allAlpha = true;
for (uint i = 0; i < 16; i++)
{ {
QuickCompress::compressDXT1a(rgba.color(0), &block); if (rgba.color(i).a < 128) anyAlpha = true;
else allAlpha = false;
}
if ((!anyAlpha && rgba.isSingleColor() || allAlpha))
{
OptimalCompress::compressDXT1a(rgba.color(0), &block);
} }
else else
{ {
@ -285,29 +278,37 @@ void nv::compressDXT1a(const Image * image, const OutputOptions::Private & outpu
} }
void nv::compressDXT3(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions) void nv::SlowCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT3 block; BlockDXT3 block;
squish::WeightedClusterFit fit; squish::WeightedClusterFit fit;
//squish::FastClusterFit fit;
fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z()); fit.SetMetric(compressionOptions.colorWeight.x(), compressionOptions.colorWeight.y(), compressionOptions.colorWeight.z());
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
// Compress explicit alpha. // Compress explicit alpha.
QuickCompress::compressDXT3A(rgba, &block.alpha); OptimalCompress::compressDXT3A(rgba, &block.alpha);
// Compress color. // Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha); squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0); fit.SetColourSet(&colours, 0);
fit.Compress(&block.color); fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -316,10 +317,10 @@ void nv::compressDXT3(const Image * image, const OutputOptions::Private & output
} }
} }
void nv::compressDXT5(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions) void nv::SlowCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT5 block; BlockDXT5 block;
@ -330,12 +331,12 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
// Compress alpha. // Compress alpha.
if (compressionOptions.quality == Quality_Highest) if (compressionOptions.quality == Quality_Highest)
{ {
compressBlock_BruteForce(rgba, &block.alpha); OptimalCompress::compressDXT5A(rgba, &block.alpha);
} }
else else
{ {
@ -343,9 +344,16 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
} }
// Compress color. // Compress color.
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), &block.color);
}
else
{
squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha); squish::ColourSet colours((uint8 *)rgba.colors(), squish::kWeightColourByAlpha);
fit.SetColourSet(&colours, 0); fit.SetColourSet(&colours, 0);
fit.Compress(&block.color); fit.Compress(&block.color);
}
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -355,28 +363,25 @@ void nv::compressDXT5(const Image * image, const OutputOptions::Private & output
} }
void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions) void nv::SlowCompressor::compressDXT5n(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
BlockDXT5 block; BlockDXT5 block;
doPrecomputation();
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
// copy X coordinate to green channel and Y coordinate to alpha channel.
rgba.swizzleDXT5n(); rgba.swizzleDXT5n();
// Compress X. // Compress X.
if (compressionOptions.quality == Quality_Highest) if (compressionOptions.quality == Quality_Highest)
{ {
compressBlock_BruteForce(rgba, &block.alpha); OptimalCompress::compressDXT5A(rgba, &block.alpha);
} }
else else
{ {
@ -384,7 +389,7 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
} }
// Compress Y. // Compress Y.
QuickCompress::compressDXT1G(rgba, &block.color); OptimalCompress::compressDXT1G(rgba, &block.color);
if (outputOptions.outputHandler != NULL) { if (outputOptions.outputHandler != NULL) {
outputOptions.outputHandler->writeData(&block, sizeof(block)); outputOptions.outputHandler->writeData(&block, sizeof(block));
@ -394,10 +399,10 @@ void nv::compressDXT5n(const Image * image, const OutputOptions::Private & outpu
} }
void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions) void nv::SlowCompressor::compressBC4(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock rgba; ColorBlock rgba;
AlphaBlockDXT5 block; AlphaBlockDXT5 block;
@ -405,11 +410,11 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y); rgba.init(m_image, x, y);
if (compressionOptions.quality == Quality_Highest) if (compressionOptions.quality == Quality_Highest)
{ {
compressBlock_BruteForce(rgba, &block); OptimalCompress::compressDXT5A(rgba, &block);
} }
else else
{ {
@ -424,10 +429,10 @@ void nv::compressBC4(const Image * image, const nvtt::OutputOptions::Private & o
} }
void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions) void nv::SlowCompressor::compressBC5(const CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{ {
const uint w = image->width(); const uint w = m_image->width();
const uint h = image->height(); const uint h = m_image->height();
ColorBlock xcolor; ColorBlock xcolor;
ColorBlock ycolor; ColorBlock ycolor;
@ -437,16 +442,16 @@ void nv::compressBC5(const Image * image, const nvtt::OutputOptions::Private & o
for (uint y = 0; y < h; y += 4) { for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) { for (uint x = 0; x < w; x += 4) {
xcolor.init(image, x, y); xcolor.init(m_image, x, y);
xcolor.splatX(); xcolor.splatX();
ycolor.init(image, x, y); ycolor.init(m_image, x, y);
ycolor.splatY(); ycolor.splatY();
if (compressionOptions.quality == Quality_Highest) if (compressionOptions.quality == Quality_Highest)
{ {
compressBlock_BruteForce(xcolor, &block.x); OptimalCompress::compressDXT5A(xcolor, &block.x);
compressBlock_BruteForce(ycolor, &block.y); OptimalCompress::compressDXT5A(ycolor, &block.y);
} }
else else
{ {

View File

@ -32,25 +32,45 @@ namespace nv
class Image; class Image;
class FloatImage; class FloatImage;
void doPrecomputation(); class FastCompressor
{
public:
FastCompressor();
~FastCompressor();
// Fast compressors. void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void fastCompressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
void fastCompressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions);
// Normal compressors. void compressDXT1(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); void compressDXT1a(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); void compressDXT3(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); void compressDXT5(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); void compressDXT5n(const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions);
void compressBC4(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); private:
void compressBC5(const Image * image, const nvtt::OutputOptions::Private & outputOptions, const nvtt::CompressionOptions::Private & compressionOptions); const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
class SlowCompressor
{
public:
SlowCompressor();
~SlowCompressor();
void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1a(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5n(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC4(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressBC5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private:
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
};
// External compressors. // External compressors.
#if defined(HAVE_S3QUANT) #if defined(HAVE_S3QUANT)

View File

@ -41,7 +41,6 @@
#include "OutputOptions.h" #include "OutputOptions.h"
#include "CompressDXT.h" #include "CompressDXT.h"
#include "FastCompressDXT.h"
#include "CompressRGB.h" #include "CompressRGB.h"
#include "cuda/CudaUtils.h" #include "cuda/CudaUtils.h"
#include "cuda/CudaCompressDXT.h" #include "cuda/CudaCompressDXT.h"
@ -56,7 +55,7 @@ namespace
static int blockSize(Format format) static int blockSize(Format format)
{ {
if (format == Format_DXT1 || format == Format_DXT1a || format == Format_DXT1n) { if (format == Format_DXT1 || format == Format_DXT1a) {
return 8; return 8;
} }
else if (format == Format_DXT3) { else if (format == Format_DXT3) {
@ -71,9 +70,6 @@ namespace
else if (format == Format_BC5) { else if (format == Format_BC5) {
return 16; return 16;
} }
else if (format == Format_CTX1) {
return 8;
}
return 0; return 0;
} }
@ -203,7 +199,7 @@ namespace nvtt
AutoPtr<FloatImage> m_floatImage; AutoPtr<FloatImage> m_floatImage;
}; };
} } // nvtt namespace
Compressor::Compressor() : m(*new Compressor::Private()) Compressor::Compressor() : m(*new Compressor::Private())
@ -348,7 +344,7 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
{ {
header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format)); header.setLinearSize(computeImageSize(inputOptions.targetWidth, inputOptions.targetHeight, inputOptions.targetDepth, compressionOptions.bitcount, compressionOptions.format));
if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a || compressionOptions.format == Format_DXT1n) { if (compressionOptions.format == Format_DXT1 || compressionOptions.format == Format_DXT1a) {
header.setFourCC('D', 'X', 'T', '1'); header.setFourCC('D', 'X', 'T', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true); if (inputOptions.isNormalMap) header.setNormalFlag(true);
} }
@ -369,10 +365,6 @@ bool Compressor::Private::outputHeader(const InputOptions::Private & inputOption
header.setFourCC('A', 'T', 'I', '2'); header.setFourCC('A', 'T', 'I', '2');
if (inputOptions.isNormalMap) header.setNormalFlag(true); if (inputOptions.isNormalMap) header.setNormalFlag(true);
} }
else if (compressionOptions.format == Format_CTX1) {
header.setFourCC('C', 'T', 'X', '1');
if (inputOptions.isNormalMap) header.setNormalFlag(true);
}
} }
// Swap bytes if necessary. // Swap bytes if necessary.
@ -478,11 +470,6 @@ bool Compressor::Private::initMipmap(Mipmap & mipmap, const InputOptions::Privat
// Convert linear float image to fixed image ready for compression. // Convert linear float image to fixed image ready for compression.
mipmap.toFixedImage(inputOptions); mipmap.toFixedImage(inputOptions);
if (inputOptions.premultiplyAlpha)
{
premultiplyAlphaMipmap(mipmap, inputOptions);
}
return true; return true;
} }
@ -587,29 +574,6 @@ void Compressor::Private::scaleMipmap(Mipmap & mipmap, const InputOptions::Priva
} }
void Compressor::Private::premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{
nvDebugCheck(mipmap.asFixedImage() != NULL);
Image * image = mipmap.asMutableFixedImage();
const uint w = image->width();
const uint h = image->height();
const uint count = w * h;
for (uint i = 0; i < count; ++i)
{
Color32 c = image->pixel(i);
c.r = (uint(c.r) * uint(c.a)) >> 8;
c.g = (uint(c.g) * uint(c.a)) >> 8;
c.b = (uint(c.b) * uint(c.a)) >> 8;
image->pixel(i) = c;
}
}
// Process an input image: Convert to normal map, normalize, or convert to linear space. // Process an input image: Convert to normal map, normalize, or convert to linear space.
void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const void Compressor::Private::processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const
{ {
@ -692,9 +656,15 @@ void Compressor::Private::quantizeMipmap(Mipmap & mipmap, const CompressionOptio
bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const
{ {
const Image * image = mipmap.asFixedImage(); const Image * image = mipmap.asFixedImage();
nvDebugCheck(image != NULL); nvDebugCheck(image != NULL);
FastCompressor fast;
fast.setImage(image, inputOptions.alphaMode);
SlowCompressor slow;
slow.setImage(image, inputOptions.alphaMode);
if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB) if (compressionOptions.format == Format_RGBA || compressionOptions.format == Format_RGB)
{ {
compressRGB(image, outputOptions, compressionOptions); compressRGB(image, outputOptions, compressionOptions);
@ -718,18 +688,19 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
#endif #endif
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fastCompressDXT1(image, outputOptions); fast.compressDXT1(outputOptions);
} }
else else
{ {
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->compressDXT1(image, compressionOptions, outputOptions); cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT1(compressionOptions, outputOptions);
} }
else else
{ {
compressDXT1(image, outputOptions, compressionOptions); slow.compressDXT1(compressionOptions, outputOptions);
} }
} }
} }
@ -737,49 +708,38 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fastCompressDXT1a(image, outputOptions); fast.compressDXT1a(outputOptions);
} }
else else
{ {
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
/*cuda*/compressDXT1a(image, outputOptions, compressionOptions); /*cuda*/slow.compressDXT1a(compressionOptions, outputOptions);
} }
else else
{ {
compressDXT1a(image, outputOptions, compressionOptions); slow.compressDXT1a(compressionOptions, outputOptions);
} }
} }
} }
else if (compressionOptions.format == Format_DXT1n)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressDXT1n(image, compressionOptions, outputOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
}
else if (compressionOptions.format == Format_DXT3) else if (compressionOptions.format == Format_DXT3)
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fastCompressDXT3(image, outputOptions); fast.compressDXT3(outputOptions);
} }
else else
{ {
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->compressDXT3(image, inputOptions, compressionOptions, outputOptions); cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT3(compressionOptions, outputOptions);
} }
else else
{ {
compressDXT3(image, outputOptions, compressionOptions); slow.compressDXT3(compressionOptions, outputOptions);
} }
} }
} }
@ -787,18 +747,19 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fastCompressDXT5(image, outputOptions); fast.compressDXT5(outputOptions);
} }
else else
{ {
if (cudaEnabled) if (cudaEnabled)
{ {
nvDebugCheck(cudaSupported); nvDebugCheck(cudaSupported);
cuda->compressDXT5(image, inputOptions, compressionOptions, outputOptions); cuda->setImage(image, inputOptions.alphaMode);
cuda->compressDXT5(compressionOptions, outputOptions);
} }
else else
{ {
compressDXT5(image, outputOptions, compressionOptions); slow.compressDXT5(compressionOptions, outputOptions);
} }
} }
} }
@ -806,32 +767,20 @@ bool Compressor::Private::compressMipmap(const Mipmap & mipmap, const InputOptio
{ {
if (compressionOptions.quality == Quality_Fastest) if (compressionOptions.quality == Quality_Fastest)
{ {
fastCompressDXT5n(image, outputOptions); fast.compressDXT5n(outputOptions);
} }
else else
{ {
compressDXT5n(image, outputOptions, compressionOptions); slow.compressDXT5n(compressionOptions, outputOptions);
} }
} }
else if (compressionOptions.format == Format_BC4) else if (compressionOptions.format == Format_BC4)
{ {
compressBC4(image, outputOptions, compressionOptions); slow.compressBC4(compressionOptions, outputOptions);
} }
else if (compressionOptions.format == Format_BC5) else if (compressionOptions.format == Format_BC5)
{ {
compressBC5(image, outputOptions, compressionOptions); slow.compressBC5(compressionOptions, outputOptions);
}
else if (compressionOptions.format == Format_CTX1)
{
if (cudaEnabled)
{
nvDebugCheck(cudaSupported);
cuda->compressCTX1(image, compressionOptions, outputOptions);
}
else
{
if (outputOptions.errorHandler) outputOptions.errorHandler->error(Error_UnsupportedFeature);
}
} }
return true; return true;

View File

@ -58,7 +58,6 @@ namespace nvtt
void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const; void downsampleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const; void scaleMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions, uint w, uint h, uint d) const;
void premultiplyAlphaMipmap(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const; void processInputImage(Mipmap & mipmap, const InputOptions::Private & inputOptions) const;
void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const; void quantizeMipmap(Mipmap & mipmap, const CompressionOptions::Private & compressionOptions) const;
bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const; bool compressMipmap(const Mipmap & mipmap, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) const;

View File

@ -1,456 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "FastCompressDXT.h"
#if defined(__SSE2__)
#include <emmintrin.h>
#endif
#if defined(__SSE__)
#include <xmmintrin.h>
#endif
#if defined(__MMX__)
#include <mmintrin.h>
#endif
#undef __VEC__
#if defined(__VEC__)
#include <altivec.h>
#undef bool
#endif
// Online Resources:
// - http://www.jasondorie.com/ImageLib.zip
// - http://homepage.hispeed.ch/rscheidegger/dri_experimental/s3tc_index.html
// - http://www.sjbrown.co.uk/?article=dxt
using namespace nv;
#if defined(__SSE2__) && 0
// @@ TODO
typedef __m128i VectorColor;
inline static __m128i loadColor(Color32 c)
{
return ...;
}
inline static __m128i absoluteDifference(__m128i a, __m128i b)
{
return ...;
}
inline uint colorDistance(__m128i a, __m128i b)
{
return 0;
}
#elif defined(__MMX__) && 0
typedef __m64 VectorColor;
inline static __m64 loadColor(Color32 c)
{
return _mm_unpacklo_pi8(_mm_cvtsi32_si64(c), _mm_setzero_si64());
}
inline static __m64 absoluteDifference(__m64 a, __m64 b)
{
// = |a-b| or |b-a|
return _mm_or_si64(_mm_subs_pu16(a, b), _mm_subs_pu16(b, a));
}
inline uint colorDistance(__m64 a, __m64 b)
{
union {
__m64 v;
uint16 part[4];
} s;
s.v = absoluteDifference(a, b);
// @@ This is very slow!
return s.part[0] + s.part[1] + s.part[2] + s.part[3];
}
#define vectorEnd _mm_empty
#elif defined(__VEC__)
typedef vector signed int VectorColor;
inline static vector signed int loadColor(Color32 c)
{
return (vector signed int) (c.r, c.g, c.b, c.a);
}
// Get the absolute distance between the given colors.
inline static uint colorDistance(vector signed int c0, vector signed int c1)
{
int result;
vector signed int v = vec_sums(vec_abs(vec_sub(c0, c1)), (vector signed int)0);
vec_ste(vec_splat(v, 3), 0, &result);
return result;
}
inline void vectorEnd()
{
}
#else
typedef Color32 VectorColor;
inline static Color32 loadColor(Color32 c)
{
return c;
}
inline static Color32 premultiplyAlpha(Color32 c)
{
Color32 pm;
pm.r = (c.r * c.a) >> 8;
pm.g = (c.g * c.a) >> 8;
pm.b = (c.b * c.a) >> 8;
pm.a = c.a;
return pm;
}
inline static uint sqr(uint s)
{
return s*s;
}
// Get the absolute distance between the given colors.
inline static uint colorDistance(Color32 c0, Color32 c1)
{
return sqr(c0.r - c1.r) + sqr(c0.g - c1.g) + sqr(c0.b - c1.b);
//return abs(c0.r - c1.r) + abs(c0.g - c1.g) + abs(c0.b - c1.b);
}
inline void vectorEnd()
{
}
#endif
inline static uint computeIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const VectorColor vcolor0 = loadColor(palette[0]);
const VectorColor vcolor1 = loadColor(palette[1]);
const VectorColor vcolor2 = loadColor(palette[2]);
const VectorColor vcolor3 = loadColor(palette[3]);
uint indices = 0;
for(int i = 0; i < 16; i++) {
const VectorColor vcolor = loadColor(rgba.color(i));
uint d0 = colorDistance(vcolor0, vcolor);
uint d1 = colorDistance(vcolor1, vcolor);
uint d2 = colorDistance(vcolor2, vcolor);
uint d3 = colorDistance(vcolor3, vcolor);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
vectorEnd();
return indices;
}
// Compressor that uses bounding box.
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block)
{
Color32 c0, c1;
rgba.boundsRange(&c1, &c0);
block->col0 = toColor16(c0);
block->col1 = toColor16(c1);
nvDebugCheck(block->col0.u > block->col1.u);
// Use 4 color mode only.
//if (block->col0.u < block->col1.u) {
// swap(block->col0.u, block->col1.u);
//}
Color32 palette[4];
block->evaluatePalette4(palette);
block->indices = computeIndices(rgba, palette);
}
// Encode DXT3 block.
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block)
{
compressBlock_BoundsRange(rgba, &block->color);
compressBlock(rgba, &block->alpha);
}
// Encode DXT3 alpha block.
void nv::compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block)
{
block->alpha0 = rgba.color(0).a >> 4;
block->alpha1 = rgba.color(1).a >> 4;
block->alpha2 = rgba.color(2).a >> 4;
block->alpha3 = rgba.color(3).a >> 4;
block->alpha4 = rgba.color(4).a >> 4;
block->alpha5 = rgba.color(5).a >> 4;
block->alpha6 = rgba.color(6).a >> 4;
block->alpha7 = rgba.color(7).a >> 4;
block->alpha8 = rgba.color(8).a >> 4;
block->alpha9 = rgba.color(9).a >> 4;
block->alphaA = rgba.color(10).a >> 4;
block->alphaB = rgba.color(11).a >> 4;
block->alphaC = rgba.color(12).a >> 4;
block->alphaD = rgba.color(13).a >> 4;
block->alphaE = rgba.color(14).a >> 4;
block->alphaF = rgba.color(15).a >> 4;
}
static uint computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
totalError += besterror;
block->setIndex(i, best);
}
return totalError;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
void nv::compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block)
{
Color32 c0, c1;
rgba.boundsRangeAlpha(&c1, &c0);
block->color.col0 = toColor16(c0);
block->color.col1 = toColor16(c1);
nvDebugCheck(block->color.col0.u > block->color.col1.u);
Color32 palette[4];
block->color.evaluatePalette4(palette);
block->color.indices = computeIndices(rgba, palette);
nvDebugCheck(c0.a <= c1.a);
block->alpha.alpha0 = c0.a;
block->alpha.alpha1 = c1.a;
computeAlphaIndices(rgba, &block->alpha);
}
uint nv::compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alpha0 = 0;
uint8 alpha1 = 255;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
alpha0 = max(alpha0, alpha);
alpha1 = min(alpha1, alpha);
}
alpha0 = alpha0 - (alpha0 - alpha1) / 32;
alpha1 = alpha1 + (alpha0 - alpha1) / 32;
AlphaBlockDXT5 block0;
block0.alpha0 = alpha0;
block0.alpha1 = alpha1;
uint error0 = computeAlphaIndices(rgba, &block0);
AlphaBlockDXT5 block1;
block1.alpha0 = alpha1;
block1.alpha1 = alpha0;
uint error1 = computeAlphaIndices(rgba, &block1);
if (error0 < error1)
{
*block = block0;
return error0;
}
else
{
*block = block1;
return error1;
}
}
uint nv::compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 mina = 255;
uint8 maxa = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
mina = min(mina, alpha);
maxa = max(maxa, alpha);
}
block->alpha0 = maxa;
block->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, block);
int besta0 = maxa;
int besta1 = mina;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
block->alpha0 = a0;
block->alpha1 = a1;
int error = computeAlphaError(rgba, block);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
block->alpha0 = besta0;
block->alpha1 = besta1;
}
return computeAlphaIndices(rgba, block);
}

View File

@ -1,84 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_FASTCOMPRESSDXT_H
#define NV_TT_FASTCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
// Color compression:
// Compressor that uses the extremes of the luminance axis.
// void compressBlock_DiameterAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses the extremes of the luminance axis.
// void compressBlock_LuminanceAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box.
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT1 * block);
// Compressor that uses bounding box and takes alpha into account.
// void compressBlock_BoundsRangeAlpha(const ColorBlock & rgba, BlockDXT1 * block);
// Simple, but slow compressor that tests all color pairs.
// void compressBlock_TestAllPairs(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force 6d search along the best fit axis.
// void compressBlock_AnalyzeBestFitAxis(const ColorBlock & rgba, BlockDXT1 * block);
// Spatial greedy search.
// void refineSolution_1dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_3dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// void refineSolution_6dSearch(const ColorBlock & rgba, BlockDXT1 * block);
// Brute force compressor for DXT5n
// void compressGreenBlock_BruteForce(const ColorBlock & rgba, BlockDXT1 * block);
// Minimize error of the endpoints.
// void optimizeEndPoints(const ColorBlock & rgba, BlockDXT1 * block);
// uint blockError(const ColorBlock & rgba, const BlockDXT1 & block);
// uint blockError(const ColorBlock & rgba, const AlphaBlockDXT5 & block);
// Alpha compression:
void compressBlock(const ColorBlock & rgba, AlphaBlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT3 * block);
void compressBlock_BoundsRange(const ColorBlock & rgba, BlockDXT5 * block);
uint compressBlock_BoundsRange(const ColorBlock & rgba, AlphaBlockDXT5 * block);
uint compressBlock_BruteForce(const ColorBlock & rgba, AlphaBlockDXT5 * block);
// uint compressBlock_Iterative(const ColorBlock & rgba, AlphaBlockDXT5 * block);
} // nv namespace
#endif // NV_TT_FASTCOMPRESSDXT_H

View File

@ -23,8 +23,6 @@
#include <string.h> // memcpy #include <string.h> // memcpy
#include <nvcore/Containers.h> // nextPowerOfTwo
#include <nvcore/Memory.h> #include <nvcore/Memory.h>
#include "nvtt.h" #include "nvtt.h"
@ -120,8 +118,6 @@ void InputOptions::reset()
m.maxExtent = 0; m.maxExtent = 0;
m.roundMode = RoundMode_None; m.roundMode = RoundMode_None;
m.premultiplyAlpha = false;
} }
@ -309,19 +305,6 @@ void InputOptions::setLinearTransform(int channel, float w0, float w1, float w2,
//m.linearTransform.setRow(channel, w); //m.linearTransform.setRow(channel, w);
} }
void InputOptions::setSwizzleTransform(int x, int y, int z, int w)
{
nvCheck(x >= 0 && x < 3);
nvCheck(y >= 0 && y < 3);
nvCheck(z >= 0 && z < 3);
nvCheck(w >= 0 && w < 3);
// m.xswizzle = x;
// m.yswizzle = y;
// m.zswizzle = z;
// m.wswizzle = w;
}
void InputOptions::setMaxExtents(int e) void InputOptions::setMaxExtents(int e)
{ {
nvDebugCheck(e > 0); nvDebugCheck(e > 0);
@ -333,10 +316,6 @@ void InputOptions::setRoundMode(RoundMode mode)
m.roundMode = mode; m.roundMode = mode;
} }
void InputOptions::setPremultiplyAlpha(bool b)
{
m.premultiplyAlpha = b;
}
void InputOptions::Private::computeTargetExtents() const void InputOptions::Private::computeTargetExtents() const
{ {

View File

@ -78,8 +78,6 @@ namespace nvtt
uint maxExtent; uint maxExtent;
RoundMode roundMode; RoundMode roundMode;
bool premultiplyAlpha;
// @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere... // @@ These are computed in nvtt::compress, so they should be mutable or stored elsewhere...
mutable uint targetWidth; mutable uint targetWidth;
mutable uint targetHeight; mutable uint targetHeight;

View File

@ -0,0 +1,368 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h>
#include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h>
#include "OptimalCompressDXT.h"
#include "SingleColorLookup.h"
using namespace nv;
using namespace OptimalCompress;
namespace
{
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
// Choose quantized color that produces less error. Used by DXT3 compressor.
inline static uint quantize4(uint8 a)
{
int q0 = (a >> 4) - 1;
int q1 = (a >> 4);
int q2 = (a >> 4) + 1;
q0 = (q0 << 4) | q0;
q1 = (q1 << 4) | q1;
q2 = (q2 << 4) | q2;
int d0 = abs(q0 - a);
int d1 = abs(q1 - a);
int d2 = abs(q2 - a);
if (d0 < d1 && d0 < d2) return q0 >> 4;
if (d1 < d2) return q1 >> 4;
return q2 >> 4;
}
static uint computeAlphaError(const ColorBlock & rgba, const AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
uint totalError = 0;
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best;
for (uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
totalError += besterror;
}
return totalError;
}
static void computeAlphaIndices(const ColorBlock & rgba, AlphaBlockDXT5 * block)
{
uint8 alphas[8];
block->evaluatePalette(alphas);
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
uint besterror = 256*256;
uint best = 8;
for(uint p = 0; p < 8; p++)
{
int d = alphas[p] - alpha;
uint error = d * d;
if (error < besterror)
{
besterror = error;
best = p;
}
}
nvDebugCheck(best < 8);
block->setIndex(i, best);
}
}
} // namespace
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void OptimalCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void OptimalCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
if (rgba.a < 128)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
}
}
// Brute force green channel compressor
void OptimalCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void OptimalCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
dxtBlock->alpha0 = quantize4(rgba.color(0).a);
dxtBlock->alpha1 = quantize4(rgba.color(1).a);
dxtBlock->alpha2 = quantize4(rgba.color(2).a);
dxtBlock->alpha3 = quantize4(rgba.color(3).a);
dxtBlock->alpha4 = quantize4(rgba.color(4).a);
dxtBlock->alpha5 = quantize4(rgba.color(5).a);
dxtBlock->alpha6 = quantize4(rgba.color(6).a);
dxtBlock->alpha7 = quantize4(rgba.color(7).a);
dxtBlock->alpha8 = quantize4(rgba.color(8).a);
dxtBlock->alpha9 = quantize4(rgba.color(9).a);
dxtBlock->alphaA = quantize4(rgba.color(10).a);
dxtBlock->alphaB = quantize4(rgba.color(11).a);
dxtBlock->alphaC = quantize4(rgba.color(12).a);
dxtBlock->alphaD = quantize4(rgba.color(13).a);
dxtBlock->alphaE = quantize4(rgba.color(14).a);
dxtBlock->alphaF = quantize4(rgba.color(15).a);
}
void OptimalCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock)
{
uint8 mina = 255;
uint8 maxa = 0;
// Get min/max alpha.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
mina = min(mina, alpha);
maxa = max(maxa, alpha);
}
dxtBlock->alpha0 = maxa;
dxtBlock->alpha1 = mina;
/*int centroidDist = 256;
int centroid;
// Get the closest to the centroid.
for (uint i = 0; i < 16; i++)
{
uint8 alpha = rgba.color(i).a;
int dist = abs(alpha - (maxa + mina) / 2);
if (dist < centroidDist)
{
centroidDist = dist;
centroid = alpha;
}
}*/
if (maxa - mina > 8)
{
int besterror = computeAlphaError(rgba, dxtBlock);
int besta0 = maxa;
int besta1 = mina;
for (int a0 = mina+9; a0 < maxa; a0++)
{
for (int a1 = mina; a1 < a0-8; a1++)
//for (int a1 = mina; a1 < maxa; a1++)
{
//nvCheck(abs(a1-a0) > 8);
//if (abs(a0 - a1) < 8) continue;
//if ((maxa-a0) + (a1-mina) + min(abs(centroid-a0), abs(centroid-a1)) > besterror)
if ((maxa-a0) + (a1-mina) > besterror)
continue;
dxtBlock->alpha0 = a0;
dxtBlock->alpha1 = a1;
int error = computeAlphaError(rgba, dxtBlock);
if (error < besterror)
{
besterror = error;
besta0 = a0;
besta1 = a1;
}
}
}
dxtBlock->alpha0 = besta0;
dxtBlock->alpha1 = besta1;
}
computeAlphaIndices(rgba, dxtBlock);
}

View File

@ -0,0 +1,49 @@
// Copyright NVIDIA Corporation 2008 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#ifndef NV_TT_OPTIMALCOMPRESSDXT_H
#define NV_TT_OPTIMALCOMPRESSDXT_H
#include <nvimage/nvimage.h>
namespace nv
{
struct ColorBlock;
struct BlockDXT1;
struct BlockDXT3;
struct BlockDXT5;
struct AlphaBlockDXT3;
struct AlphaBlockDXT5;
namespace OptimalCompress
{
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock);
}
} // nv namespace
#endif // NV_TT_OPTIMALCOMPRESSDXT_H

View File

@ -21,15 +21,13 @@
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR // FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE. // OTHER DEALINGS IN THE SOFTWARE.
#include <nvcore/Containers.h> // swap
#include <nvmath/Color.h> #include <nvmath/Color.h>
#include <nvimage/ColorBlock.h> #include <nvimage/ColorBlock.h>
#include <nvimage/BlockDXT.h> #include <nvimage/BlockDXT.h>
#include "QuickCompressDXT.h" #include "QuickCompressDXT.h"
#include "SingleColorLookup.h" #include "OptimalCompressDXT.h"
using namespace nv; using namespace nv;
@ -290,70 +288,6 @@ static void optimizeEndPoints4(Vector3 block[16], BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices3(block, a, b); dxtBlock->indices = computeIndices3(block, a, b);
}*/ }*/
namespace
{
static int computeGreenError(const ColorBlock & rgba, const BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
int palette[4];
palette[0] = (block->col0.g << 2) | (block->col0.g >> 4);
palette[1] = (block->col1.g << 2) | (block->col1.g >> 4);
palette[2] = (2 * palette[0] + palette[1]) / 3;
palette[3] = (2 * palette[1] + palette[0]) / 3;
int totalError = 0;
for (int i = 0; i < 16; i++)
{
const int green = rgba.color(i).g;
int error = abs(green - palette[0]);
error = min(error, abs(green - palette[1]));
error = min(error, abs(green - palette[2]));
error = min(error, abs(green - palette[3]));
totalError += error;
}
return totalError;
}
static uint computeGreenIndices(const ColorBlock & rgba, const Color32 palette[4])
{
const int color0 = palette[0].g;
const int color1 = palette[1].g;
const int color2 = palette[2].g;
const int color3 = palette[3].g;
uint indices = 0;
for (int i = 0; i < 16; i++)
{
const int color = rgba.color(i).g;
uint d0 = abs(color0 - color);
uint d1 = abs(color1 - color);
uint d2 = abs(color2 - color);
uint d3 = abs(color3 - color);
uint b0 = d0 > d3;
uint b1 = d1 > d2;
uint b2 = d0 > d2;
uint b3 = d1 > d3;
uint b4 = d2 > d3;
uint x0 = b1 & b2;
uint x1 = b0 & b3;
uint x2 = b0 & b4;
indices |= (x2 | ((x0 | x1) << 1)) << (2 * i);
}
return indices;
}
} // namespace
namespace namespace
{ {
@ -505,29 +439,14 @@ namespace
// Single color compressor, based on:
// https://mollyrocket.com/forums/viewtopic.php?t=392
void QuickCompress::compressDXT1(Color32 c, BlockDXT1 * dxtBlock)
{
dxtBlock->col0.r = OMatch5[c.r][0];
dxtBlock->col0.g = OMatch6[c.g][0];
dxtBlock->col0.b = OMatch5[c.b][0];
dxtBlock->col1.r = OMatch5[c.r][1];
dxtBlock->col1.g = OMatch6[c.g][1];
dxtBlock->col1.b = OMatch5[c.b][1];
dxtBlock->indices = 0xaaaaaaaa;
if (dxtBlock->col0.u < dxtBlock->col1.u)
{
swap(dxtBlock->col0.u, dxtBlock->col1.u);
dxtBlock->indices ^= 0x55555555;
}
}
void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock) void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{ {
if (rgba.isSingleColor())
{
OptimalCompress::compressDXT1(rgba.color(0), dxtBlock);
}
else
{
// read block // read block
Vector3 block[16]; Vector3 block[16];
extractColorBlockRGB(rgba, block); extractColorBlockRGB(rgba, block);
@ -554,29 +473,28 @@ void QuickCompress::compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
dxtBlock->indices = computeIndices4(block, maxColor, minColor); dxtBlock->indices = computeIndices4(block, maxColor, minColor);
optimizeEndPoints4(block, dxtBlock); optimizeEndPoints4(block, dxtBlock);
}
void QuickCompress::compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock)
{
if (rgba.a == 0)
{
dxtBlock->col0.u = 0;
dxtBlock->col1.u = 0;
dxtBlock->indices = 0xFFFFFFFF;
}
else
{
compressDXT1(rgba, dxtBlock);
} }
} }
void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock) void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
{ {
if (!rgba.hasAlpha()) bool hasAlpha = false;
for (uint i = 0; i < 16; i++)
{
if (rgba.color(i).a < 128) {
hasAlpha = true;
break;
}
}
if (!hasAlpha)
{ {
compressDXT1(rgba, dxtBlock); compressDXT1(rgba, dxtBlock);
} }
// @@ Handle single RGB, with varying alpha? We need tables for single color compressor in 3 color mode.
//else if (rgba.isSingleColorNoAlpha()) { ... }
else else
{ {
// read block // read block
@ -609,95 +527,14 @@ void QuickCompress::compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock)
} }
// Brute force green channel compressor
void QuickCompress::compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block)
{
nvDebugCheck(block != NULL);
uint8 ming = 63;
uint8 maxg = 0;
// Get min/max green.
for (uint i = 0; i < 16; i++)
{
uint8 green = rgba.color(i).g >> 2;
ming = min(ming, green);
maxg = max(maxg, green);
}
block->col0.r = 31;
block->col1.r = 31;
block->col0.g = maxg;
block->col1.g = ming;
block->col0.b = 0;
block->col1.b = 0;
if (maxg - ming > 4)
{
int besterror = computeGreenError(rgba, block);
int bestg0 = maxg;
int bestg1 = ming;
for (int g0 = ming+5; g0 < maxg; g0++)
{
for (int g1 = ming; g1 < g0-4; g1++)
{
if ((maxg-g0) + (g1-ming) > besterror)
continue;
block->col0.g = g0;
block->col1.g = g1;
int error = computeGreenError(rgba, block);
if (error < besterror)
{
besterror = error;
bestg0 = g0;
bestg1 = g1;
}
}
}
block->col0.g = bestg0;
block->col1.g = bestg1;
}
Color32 palette[4];
block->evaluatePalette(palette);
block->indices = computeGreenIndices(rgba, palette);
}
void QuickCompress::compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock)
{
// @@ Round instead of truncate. When rounding take into account bit expansion.
dxtBlock->alpha0 = rgba.color(0).a >> 4;
dxtBlock->alpha1 = rgba.color(1).a >> 4;
dxtBlock->alpha2 = rgba.color(2).a >> 4;
dxtBlock->alpha3 = rgba.color(3).a >> 4;
dxtBlock->alpha4 = rgba.color(4).a >> 4;
dxtBlock->alpha5 = rgba.color(5).a >> 4;
dxtBlock->alpha6 = rgba.color(6).a >> 4;
dxtBlock->alpha7 = rgba.color(7).a >> 4;
dxtBlock->alpha8 = rgba.color(8).a >> 4;
dxtBlock->alpha9 = rgba.color(9).a >> 4;
dxtBlock->alphaA = rgba.color(10).a >> 4;
dxtBlock->alphaB = rgba.color(11).a >> 4;
dxtBlock->alphaC = rgba.color(12).a >> 4;
dxtBlock->alphaD = rgba.color(13).a >> 4;
dxtBlock->alphaE = rgba.color(14).a >> 4;
dxtBlock->alphaF = rgba.color(15).a >> 4;
}
void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock) void QuickCompress::compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock)
{ {
compressDXT1(rgba, &dxtBlock->color); compressDXT1(rgba, &dxtBlock->color);
compressDXT3A(rgba, &dxtBlock->alpha); OptimalCompress::compressDXT3A(rgba, &dxtBlock->alpha);
} }
void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock) void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount/*=8*/)
{ {
uint8 alpha0 = 0; uint8 alpha0 = 0;
uint8 alpha1 = 255; uint8 alpha1 = 255;
@ -717,7 +554,7 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
AlphaBlockDXT5 bestblock = block; AlphaBlockDXT5 bestblock = block;
while(true) for (int i = 0; i < iterationCount; i++)
{ {
optimizeAlpha8(rgba, &block); optimizeAlpha8(rgba, &block);
uint error = computeAlphaIndices(rgba, &block); uint error = computeAlphaIndices(rgba, &block);
@ -741,9 +578,8 @@ void QuickCompress::compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtB
*dxtBlock = bestblock; *dxtBlock = bestblock;
} }
void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock) void QuickCompress::compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount/*=8*/)
{ {
compressDXT1(rgba, &dxtBlock->color); compressDXT1(rgba, &dxtBlock->color);
compressDXT5A(rgba, &dxtBlock->alpha); compressDXT5A(rgba, &dxtBlock->alpha, iterationCount);
} }

View File

@ -37,17 +37,13 @@ namespace nv
namespace QuickCompress namespace QuickCompress
{ {
void compressDXT1(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock); void compressDXT1(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(Color32 rgba, BlockDXT1 * dxtBlock);
void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock); void compressDXT1a(const ColorBlock & rgba, BlockDXT1 * dxtBlock);
void compressDXT1G(const ColorBlock & rgba, BlockDXT1 * block);
void compressDXT3A(const ColorBlock & rgba, AlphaBlockDXT3 * dxtBlock);
void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock); void compressDXT3(const ColorBlock & rgba, BlockDXT3 * dxtBlock);
void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock); void compressDXT5A(const ColorBlock & rgba, AlphaBlockDXT5 * dxtBlock, int iterationCount=8);
void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock); void compressDXT5(const ColorBlock & rgba, BlockDXT5 * dxtBlock, int iterationCount=8);
} }
} // nv namespace } // nv namespace

View File

@ -159,7 +159,7 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
} }
} }
__device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16]) __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sums[16], float weights[16], int xrefs[16], int * sameColor)
{ {
const int bid = blockIdx.x; const int bid = blockIdx.x;
const int idx = threadIdx.x; const int idx = threadIdx.x;
@ -189,6 +189,8 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
colorSums(colors, sums); colorSums(colors, sums);
float3 axis = bestFitLine(colors, sums[0], kColorMetric); float3 axis = bestFitLine(colors, sums[0], kColorMetric);
*sameColor = (axis == make_float3(0, 0, 0));
dps[idx] = dot(rawColors[idx], axis); dps[idx] = dot(rawColors[idx], axis);
#if __DEVICE_EMULATION__ #if __DEVICE_EMULATION__
@ -205,43 +207,6 @@ __device__ void loadColorBlock(const uint * image, float3 colors[16], float3 sum
} }
} }
__device__ void loadColorBlock(const uint * image, float2 colors[16], float2 sums[16], int xrefs[16])
{
const int bid = blockIdx.x;
const int idx = threadIdx.x;
__shared__ float dps[16];
if (idx < 16)
{
// Read color and copy to shared mem.
uint c = image[(bid) * 16 + idx];
colors[idx].y = ((c >> 8) & 0xFF) * (1.0f / 255.0f);
colors[idx].x = ((c >> 16) & 0xFF) * (1.0f / 255.0f);
// No need to synchronize, 16 < warp size.
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
// Sort colors along the best fit line.
colorSums(colors, sums);
float2 axis = bestFitLine(colors, sums[0]);
dps[idx] = dot(colors[idx], axis);
#if __DEVICE_EMULATION__
} __debugsync(); if (idx < 16) {
#endif
sortColors(dps, xrefs);
float2 tmp = colors[idx];
colors[xrefs[idx]] = tmp;
}
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Round color to RGB565 and expand // Round color to RGB565 and expand
@ -258,26 +223,6 @@ inline __device__ float3 roundAndExpand565(float3 v, ushort * w)
return v; return v;
} }
inline __device__ float2 roundAndExpand56(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 31.0f);
v.y = rintf(__saturatef(v.y) * 63.0f);
*w = ((ushort)v.x << 11) | ((ushort)v.y << 5);
v.x *= 0.03227752766457f; // approximate integer bit expansion.
v.y *= 0.01583151765563f;
return v;
}
inline __device__ float2 roundAndExpand88(float2 v, ushort * w)
{
v.x = rintf(__saturatef(v.x) * 255.0f);
v.y = rintf(__saturatef(v.y) * 255.0f);
*w = ((ushort)v.x << 8) | ((ushort)v.y);
v.x *= 1.0f / 255.0f;
v.y *= 1.0f / 255.0f;
return v;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Evaluate permutations // Evaluate permutations
@ -521,114 +466,6 @@ __device__ float evalPermutation3(const float3 * colors, const float * weights,
} }
*/ */
__device__ float evalPermutation4(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
__device__ float evalPermutation3(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable3[bits & 3] * colors[i];
akku += prods3[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 4.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 5-6 color and expand...
a = roundAndExpand56(a, start);
b = roundAndExpand56(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 4.0f) * (e.x + e.y);
}
__device__ float evalPermutationCTX(const float2 * colors, float2 color_sum, uint permutation, ushort * start, ushort * end)
{
// Compute endpoints using least squares.
float2 alphax_sum = make_float2(0.0f, 0.0f);
uint akku = 0;
// Compute alpha & beta for this permutation.
#pragma unroll
for (int i = 0; i < 16; i++)
{
const uint bits = permutation >> (2*i);
alphax_sum += alphaTable4[bits & 3] * colors[i];
akku += prods4[bits & 3];
}
float alpha2_sum = float(akku >> 16);
float beta2_sum = float((akku >> 8) & 0xff);
float alphabeta_sum = float(akku & 0xff);
float2 betax_sum = 9.0f * color_sum - alphax_sum;
const float factor = 1.0f / (alpha2_sum * beta2_sum - alphabeta_sum * alphabeta_sum);
float2 a = (alphax_sum * beta2_sum - betax_sum * alphabeta_sum) * factor;
float2 b = (betax_sum * alpha2_sum - alphax_sum * alphabeta_sum) * factor;
// Round a, b to the closest 8-8 color and expand...
a = roundAndExpand88(a, start);
b = roundAndExpand88(b, end);
// compute the error
float2 e = a * a * alpha2_sum + b * b * beta2_sum + 2.0f * (a * b * alphabeta_sum - a * alphax_sum - b * betax_sum);
return (1.0f / 9.0f) * (e.x + e.y);
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Evaluate all permutations // Evaluate all permutations
@ -757,14 +594,12 @@ __device__ void evalAllPermutations(const float3 * colors, const float * weights
} }
*/ */
__device__ void evalAllPermutations(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors) __device__ void evalLevel4Permutations(const float3 * colors, float3 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{ {
const int idx = threadIdx.x; const int idx = threadIdx.x;
float bestError = FLT_MAX; float bestError = FLT_MAX;
__shared__ uint s_permutations[160];
for(int i = 0; i < 16; i++) for(int i = 0; i < 16; i++)
{ {
int pidx = idx + NUM_THREADS * i; int pidx = idx + NUM_THREADS * i;
@ -772,7 +607,6 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons
ushort start, end; ushort start, end;
uint permutation = permutations[pidx]; uint permutation = permutations[pidx];
if (pidx < 160) s_permutations[pidx] = permutation;
float error = evalPermutation4(colors, colorSum, permutation, &start, &end); float error = evalPermutation4(colors, colorSum, permutation, &start, &end);
@ -791,30 +625,6 @@ __device__ void evalAllPermutations(const float2 * colors, float2 colorSum, cons
bestPermutation ^= 0x55555555; // Flip indices. bestPermutation ^= 0x55555555; // Flip indices.
} }
for(int i = 0; i < 3; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 160) break;
ushort start, end;
uint permutation = s_permutations[pidx];
float error = evalPermutation3(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
if (bestStart > bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= (~bestPermutation >> 1) & 0x55555555; // Flip indices.
}
}
}
errors[idx] = bestError; errors[idx] = bestError;
} }
@ -852,40 +662,6 @@ __device__ void evalLevel4Permutations(const float3 * colors, const float * weig
errors[idx] = bestError; errors[idx] = bestError;
} }
__device__ void evalAllPermutationsCTX(const float2 * colors, float2 colorSum, const uint * permutations, ushort & bestStart, ushort & bestEnd, uint & bestPermutation, float * errors)
{
const int idx = threadIdx.x;
float bestError = FLT_MAX;
for(int i = 0; i < 16; i++)
{
int pidx = idx + NUM_THREADS * i;
if (pidx >= 992) break;
ushort start, end;
uint permutation = permutations[pidx];
float error = evalPermutationCTX(colors, colorSum, permutation, &start, &end);
if (error < bestError)
{
bestError = error;
bestPermutation = permutation;
bestStart = start;
bestEnd = end;
}
}
if (bestStart < bestEnd)
{
swap(bestEnd, bestStart);
bestPermutation ^= 0x55555555; // Flip indices.
}
errors[idx] = bestError;
}
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Find index with minimum error // Find index with minimum error
@ -996,11 +772,6 @@ __device__ void saveBlockDXT1(ushort start, ushort end, uint permutation, int xr
result[bid].y = indices; result[bid].y = indices;
} }
__device__ void saveBlockCTX1(ushort start, ushort end, uint permutation, int xrefs[16], uint2 * result)
{
saveBlockDXT1(start, end, permutation, xrefs, result);
}
__device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result) __device__ void saveSingleColorBlockDXT1(float3 color, uint2 * result)
{ {
const int bid = blockIdx.x; const int bid = blockIdx.x;
@ -1062,6 +833,39 @@ __global__ void compressDXT1(const uint * permutations, const uint * image, uint
} }
} }
__global__ void compressLevel4DXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float3 colors[16];
__shared__ float3 sums[16];
__shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, xrefs, &sameColor);
__syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalLevel4Permutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result) __global__ void compressWeightedDXT1(const uint * permutations, const uint * image, uint2 * result)
{ {
@ -1069,11 +873,18 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
__shared__ float3 sums[16]; __shared__ float3 sums[16];
__shared__ float weights[16]; __shared__ float weights[16];
__shared__ int xrefs[16]; __shared__ int xrefs[16];
__shared__ int sameColor;
loadColorBlock(image, colors, sums, weights, xrefs); loadColorBlock(image, colors, sums, weights, xrefs, &sameColor);
__syncthreads(); __syncthreads();
if (sameColor)
{
if (threadIdx.x == 0) saveSingleColorBlockDXT1(colors[0], result);
return;
}
ushort bestStart, bestEnd; ushort bestStart, bestEnd;
uint bestPermutation; uint bestPermutation;
@ -1092,61 +903,6 @@ __global__ void compressWeightedDXT1(const uint * permutations, const uint * ima
} }
__global__ void compressNormalDXT1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutations(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockDXT1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
__global__ void compressCTX1(const uint * permutations, const uint * image, uint2 * result)
{
__shared__ float2 colors[16];
__shared__ float2 sums[16];
__shared__ int xrefs[16];
loadColorBlock(image, colors, sums, xrefs);
__syncthreads();
ushort bestStart, bestEnd;
uint bestPermutation;
__shared__ float errors[NUM_THREADS];
evalAllPermutationsCTX(colors, sums[0], permutations, bestStart, bestEnd, bestPermutation, errors);
// Use a parallel reduction to find minimum error.
const int minIdx = findMinError(errors);
// Only write the result of the winner thread.
if (threadIdx.x == minIdx)
{
saveBlockCTX1(bestStart, bestEnd, bestPermutation, xrefs, result);
}
}
/* /*
__device__ float computeError(const float weights[16], uchar a0, uchar a1) __device__ float computeError(const float weights[16], uchar a0, uchar a1)
{ {
@ -1352,17 +1108,12 @@ extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result
compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result); compressDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
} }
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressLevel4DXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps) extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{ {
compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result); compressWeightedDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
} }
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressNormalDXT1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps)
{
compressCTX1<<<blockNum, NUM_THREADS>>>(d_bitmaps, d_data, (uint2 *)d_result);
}

View File

@ -30,6 +30,7 @@
#include <nvtt/CompressionOptions.h> #include <nvtt/CompressionOptions.h>
#include <nvtt/OutputOptions.h> #include <nvtt/OutputOptions.h>
#include <nvtt/QuickCompressDXT.h> #include <nvtt/QuickCompressDXT.h>
#include <nvtt/OptimalCompressDXT.h>
#include "CudaCompressDXT.h" #include "CudaCompressDXT.h"
#include "CudaUtils.h" #include "CudaUtils.h"
@ -52,10 +53,8 @@ using namespace nvtt;
extern "C" void setupCompressKernel(const float weights[3]); extern "C" void setupCompressKernel(const float weights[3]);
extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); extern "C" void compressKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelDXT1_Level4(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps); extern "C" void compressWeightedKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressNormalKernelDXT1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
extern "C" void compressKernelCTX1(uint blockNum, uint * d_data, uint * d_result, uint * d_bitmaps);
#include "Bitmaps.h" // @@ Rename to BitmapTable.h #include "Bitmaps.h" // @@ Rename to BitmapTable.h
@ -121,113 +120,25 @@ bool CudaCompressor::isValid() const
// @@ This code is very repetitive and needs to be cleaned up. // @@ This code is very repetitive and needs to be cleaned up.
#if 0 void CudaCompressor::setImage(const Image * image, nvtt::AlphaMode alphaMode)
struct CudaCompressionKernel
{ {
virtual void setup(const CompressionOptions::Private & compressionOptions) m_image = image;
{ m_alphaMode = alphaMode;
setupCompressKernel(compressionOptions.colorWeight.ptr());
}
virtual void setBitmapTable();
virtual void runDeviceCode(int count);
virtual void runHostCode(int count);
};
void CudaCompressor::compressKernel(CudaCompressionKernel * kernel)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
kernel->setup(compressionOptions);
kernel->setBitmapTable(m_bitmapTable);
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
kernel->runDeviceCode(count, m_data, m_result);
kernel->runHostCode(count);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
kernel->outputResult(outputOptions.outputHandler);
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
} }
#endif // 0
/// Compress image using CUDA. /// Compress image using CUDA.
void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void CudaCompressor::compressDXT1(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
nvDebugCheck(cuda::isHardwarePresent()); nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA #if defined HAVE_CUDA
// Image size in blocks. // Image size in blocks.
const uint w = (image->width() + 3) / 4; const uint w = (m_image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4; const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32); uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize); uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU! convertToBlockLinear(m_image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h; const uint blockNum = w * h;
const uint compressedSize = blockNum * 8; const uint compressedSize = blockNum * 8;
@ -272,7 +183,7 @@ void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions:
} }
clock_t end = clock(); clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage); free(blockLinearImage);
@ -286,18 +197,18 @@ void CudaCompressor::compressDXT1(const Image * image, const CompressionOptions:
/// Compress image using CUDA. /// Compress image using CUDA.
void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void CudaCompressor::compressDXT3(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
nvDebugCheck(cuda::isHardwarePresent()); nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA #if defined HAVE_CUDA
// Image size in blocks. // Image size in blocks.
const uint w = (image->width() + 3) / 4; const uint w = (m_image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4; const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32); uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize); uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h; const uint blockNum = w * h;
const uint compressedSize = blockNum * 8; const uint compressedSize = blockNum * 8;
@ -317,13 +228,20 @@ void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Priva
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel. // Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable); compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU. // Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++) for (uint i = 0; i < count; i++)
{ {
ColorBlock rgba(blockLinearImage + (bn + i) * 16); ColorBlock rgba(blockLinearImage + (bn + i) * 16);
QuickCompress::compressDXT3A(rgba, alphaBlocks + i); OptimalCompress::compressDXT3A(rgba, alphaBlocks + i);
} }
// Check for errors. // Check for errors.
@ -355,7 +273,7 @@ void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Priva
} }
clock_t end = clock(); clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks); free(alphaBlocks);
free(blockLinearImage); free(blockLinearImage);
@ -370,18 +288,18 @@ void CudaCompressor::compressDXT3(const Image * image, const InputOptions::Priva
/// Compress image using CUDA. /// Compress image using CUDA.
void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Private & inputOptions, const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions) void CudaCompressor::compressDXT5(const CompressionOptions::Private & compressionOptions, const OutputOptions::Private & outputOptions)
{ {
nvDebugCheck(cuda::isHardwarePresent()); nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA #if defined HAVE_CUDA
// Image size in blocks. // Image size in blocks.
const uint w = (image->width() + 3) / 4; const uint w = (m_image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4; const uint h = (m_image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32); uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize); uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); convertToBlockLinear(m_image, blockLinearImage);
const uint blockNum = w * h; const uint blockNum = w * h;
const uint compressedSize = blockNum * 8; const uint compressedSize = blockNum * 8;
@ -401,7 +319,14 @@ void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Priva
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice); cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel. // Launch kernel.
if (m_alphaMode == AlphaMode_Transparency)
{
compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable); compressWeightedKernelDXT1(count, m_data, m_result, m_bitmapTable);
}
else
{
compressKernelDXT1_Level4(count, m_data, m_result, m_bitmapTable);
}
// Compress alpha in parallel with the GPU. // Compress alpha in parallel with the GPU.
for (uint i = 0; i < count; i++) for (uint i = 0; i < count; i++)
@ -439,7 +364,7 @@ void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Priva
} }
clock_t end = clock(); clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC); //printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(alphaBlocks); free(alphaBlocks);
free(blockLinearImage); free(blockLinearImage);
@ -453,323 +378,3 @@ void CudaCompressor::compressDXT5(const Image * image, const InputOptions::Priva
} }
void CudaCompressor::compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressNormalKernelDXT1(count, m_data, m_result, m_bitmapTable);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
void CudaCompressor::compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions)
{
nvDebugCheck(cuda::isHardwarePresent());
#if defined HAVE_CUDA
// Image size in blocks.
const uint w = (image->width() + 3) / 4;
const uint h = (image->height() + 3) / 4;
uint imageSize = w * h * 16 * sizeof(Color32);
uint * blockLinearImage = (uint *) malloc(imageSize);
convertToBlockLinear(image, blockLinearImage); // @@ Do this in parallel with the GPU, or in the GPU!
const uint blockNum = w * h;
const uint compressedSize = blockNum * 8;
clock_t start = clock();
setupCompressKernel(compressionOptions.colorWeight.ptr());
// TODO: Add support for multiple GPUs.
uint bn = 0;
while(bn != blockNum)
{
uint count = min(blockNum - bn, MAX_BLOCKS);
cudaMemcpy(m_data, blockLinearImage + bn * 16, count * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelCTX1(count, m_data, m_result, m_bitmapTable);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
cudaMemcpy(blockLinearImage, m_result, count * 8, cudaMemcpyDeviceToHost);
// Output result.
if (outputOptions.outputHandler != NULL)
{
outputOptions.outputHandler->writeData(blockLinearImage, count * 8);
}
bn += count;
}
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
free(blockLinearImage);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#if 0
class Task
{
public:
explicit Task(uint numBlocks) : blockMaxCount(numBlocks), blockCount(0)
{
// System memory allocations.
blockLinearImage = new uint[blockMaxCount * 16];
xrefs = new uint[blockMaxCount * 16];
// Device memory allocations.
cudaMalloc((void**) &d_blockLinearImage, blockMaxCount * 16 * sizeof(uint));
cudaMalloc((void**) &d_compressedImage, blockMaxCount * 8U);
// @@ Check for allocation errors.
}
~Task()
{
delete [] blockLinearImage;
delete [] xrefs;
cudaFree(d_blockLinearImage);
cudaFree(d_compressedImage);
}
void addColorBlock(const ColorBlock & rgba)
{
nvDebugCheck(!isFull());
// @@ Count unique colors?
/*
// Convert colors to vectors.
Array<Vector3> pointArray(16);
for(int i = 0; i < 16; i++) {
const Color32 color = rgba.color(i);
pointArray.append(Vector3(color.r, color.g, color.b));
}
// Find best fit line.
const Vector3 axis = Fit::bestLine(pointArray).direction();
// Project points to axis.
float dps[16];
uint * order = &xrefs[blockCount * 16];
for (uint i = 0; i < 16; ++i)
{
dps[i] = dot(pointArray[i], axis);
order[i] = i;
}
// Sort them.
for (uint i = 0; i < 16; ++i)
{
for (uint j = i; j > 0 && dps[j] < dps[j - 1]; --j)
{
swap(dps[j], dps[j - 1]);
swap(order[j], order[j - 1]);
}
}
*/
// Write sorted colors to blockLinearImage.
for(uint i = 0; i < 16; ++i)
{
// blockLinearImage[blockCount * 16 + i] = rgba.color(order[i]);
blockLinearImage[blockCount * 16 + i] = rgba.color(i);
}
++blockCount;
}
bool isFull()
{
nvDebugCheck(blockCount <= blockMaxCount);
return blockCount == blockMaxCount;
}
void flush(const OutputOptions::Private & outputOptions)
{
if (blockCount == 0)
{
// Nothing to do.
return;
}
// Copy input color blocks.
cudaMemcpy(d_blockLinearImage, blockLinearImage, blockCount * 64, cudaMemcpyHostToDevice);
// Launch kernel.
compressKernelDXT1(blockCount, d_blockLinearImage, d_compressedImage, d_bitmaps);
// Check for errors.
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess)
{
nvDebug("CUDA Error: %s\n", cudaGetErrorString(err));
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
}
// Copy result to host, overwrite swizzled image.
uint * compressedImage = blockLinearImage;
cudaMemcpy(compressedImage, d_compressedImage, blockCount * 8, cudaMemcpyDeviceToHost);
// @@ Sort block indices.
// Output result.
if (outputOptions.outputHandler != NULL)
{
// outputOptions.outputHandler->writeData(compressedImage, blockCount * 8);
}
blockCount = 0;
}
private:
const uint blockMaxCount;
uint blockCount;
uint * blockLinearImage;
uint * xrefs;
uint * d_blockLinearImage;
uint * d_compressedImage;
};
void nv::cudaCompressDXT1_2(const Image * image, const OutputOptions::Private & outputOptions, const CompressionOptions::Private & compressionOptions)
{
#if defined HAVE_CUDA
const uint w = image->width();
const uint h = image->height();
const uint blockNum = ((w + 3) / 4) * ((h + 3) / 4);
const uint blockMax = 32768; // 49152, 65535
setupCompressKernelDXT1(compressionOptions.colorWeight.ptr());
ColorBlock rgba;
Task task(min(blockNum, blockMax));
clock_t start = clock();
for (uint y = 0; y < h; y += 4) {
for (uint x = 0; x < w; x += 4) {
rgba.init(image, x, y);
task.addColorBlock(rgba);
if (task.isFull())
{
task.flush(outputOptions);
}
}
}
task.flush(outputOptions);
clock_t end = clock();
printf("\rCUDA time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
#else
if (outputOptions.errorHandler != NULL)
{
outputOptions.errorHandler->error(Error_CudaError);
}
#endif
}
#endif // 0

View File

@ -39,17 +39,20 @@ namespace nv
bool isValid() const; bool isValid() const;
void compressDXT1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); void setImage(const Image * image, nvtt::AlphaMode alphaMode);
void compressDXT3(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT5(const Image * image, const nvtt::InputOptions::Private & inputOptions, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); void compressDXT1(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressDXT1n(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); void compressDXT3(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
void compressCTX1(const Image * image, const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions); void compressDXT5(const nvtt::CompressionOptions::Private & compressionOptions, const nvtt::OutputOptions::Private & outputOptions);
private: private:
uint * m_bitmapTable; uint * m_bitmapTable;
uint * m_data; uint * m_data;
uint * m_result; uint * m_result;
const Image * m_image;
nvtt::AlphaMode m_alphaMode;
}; };
} // nv namespace } // nv namespace

View File

@ -87,64 +87,6 @@ inline __device__ __host__ bool operator ==(float3 a, float3 b)
return a.x == b.x && a.y == b.y && a.z == b.z; return a.x == b.x && a.y == b.y && a.z == b.z;
} }
// float2 operators
inline __device__ __host__ float2 operator *(float2 a, float2 b)
{
return make_float2(a.x*b.x, a.y*b.y);
}
inline __device__ __host__ float2 operator *(float f, float2 v)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator *(float2 v, float f)
{
return make_float2(v.x*f, v.y*f);
}
inline __device__ __host__ float2 operator +(float2 a, float2 b)
{
return make_float2(a.x+b.x, a.y+b.y);
}
inline __device__ __host__ void operator +=(float2 & b, float2 a)
{
b.x += a.x;
b.y += a.y;
}
inline __device__ __host__ float2 operator -(float2 a, float2 b)
{
return make_float2(a.x-b.x, a.y-b.y);
}
inline __device__ __host__ void operator -=(float2 & b, float2 a)
{
b.x -= a.x;
b.y -= a.y;
}
inline __device__ __host__ float2 operator /(float2 v, float f)
{
float inv = 1.0f / f;
return v * inv;
}
inline __device__ __host__ void operator /=(float2 & b, float f)
{
float inv = 1.0f / f;
b.x *= inv;
b.y *= inv;
}
inline __device__ __host__ float dot(float2 a, float2 b)
{
return a.x * b.x + a.y * b.y;
}
inline __device__ __host__ float dot(float3 a, float3 b) inline __device__ __host__ float dot(float3 a, float3 b)
{ {
return a.x * b.x + a.y * b.y + a.z * b.z; return a.x * b.x + a.y * b.y + a.z * b.z;
@ -301,89 +243,5 @@ inline __device__ float3 bestFitLine(const float3 * colors, float3 color_sum, fl
return firstEigenVector(covariance); return firstEigenVector(covariance);
} }
// @@ For 2D this may not be the most efficient method. It's a quadratic equation, right?
inline __device__ __host__ float2 firstEigenVector2D( float matrix[3] )
{
// @@ 8 iterations is probably more than enough.
float2 v = make_float2(1.0f, 1.0f);
for(int i = 0; i < 8; i++) {
float x = v.x * matrix[0] + v.y * matrix[1];
float y = v.x * matrix[1] + v.y * matrix[2];
float m = max(x, y);
float iv = 1.0f / m;
if (m == 0.0f) iv = 0.0f;
v = make_float2(x*iv, y*iv);
}
return v;
}
inline __device__ void colorSums(const float2 * colors, float2 * sums)
{
#if __DEVICE_EMULATION__
float2 color_sum = make_float2(0.0f, 0.0f, 0.0f);
for (int i = 0; i < 16; i++)
{
color_sum += colors[i];
}
for (int i = 0; i < 16; i++)
{
sums[i] = color_sum;
}
#else
const int idx = threadIdx.x;
sums[idx] = colors[idx];
sums[idx] += sums[idx^8];
sums[idx] += sums[idx^4];
sums[idx] += sums[idx^2];
sums[idx] += sums[idx^1];
#endif
}
inline __device__ float2 bestFitLine(const float2 * colors, float2 color_sum)
{
// Compute covariance matrix of the given colors.
#if __DEVICE_EMULATION__
float covariance[3] = {0, 0, 0};
for (int i = 0; i < 16; i++)
{
float2 a = (colors[i] - color_sum * (1.0f / 16.0f));
covariance[0] += a.x * a.x;
covariance[1] += a.x * a.y;
covariance[3] += a.y * a.y;
}
#else
const int idx = threadIdx.x;
float2 diff = (colors[idx] - color_sum * (1.0f / 16.0f));
__shared__ float covariance[16*3];
covariance[3 * idx + 0] = diff.x * diff.x;
covariance[3 * idx + 1] = diff.x * diff.y;
covariance[3 * idx + 2] = diff.y * diff.y;
for(int d = 8; d > 0; d >>= 1)
{
if (idx < d)
{
covariance[3 * idx + 0] += covariance[3 * (idx+d) + 0];
covariance[3 * idx + 1] += covariance[3 * (idx+d) + 1];
covariance[3 * idx + 2] += covariance[3 * (idx+d) + 2];
}
}
#endif
// Compute first eigen vector.
return firstEigenVector2D(covariance);
}
#endif // CUDAMATH_H #endif // CUDAMATH_H

View File

@ -74,7 +74,7 @@ bool nv::cuda::isHardwarePresent()
{ {
#if defined HAVE_CUDA #if defined HAVE_CUDA
#if NV_OS_WIN32 #if NV_OS_WIN32
if (isWindowsVista()) return false; //if (isWindowsVista()) return false;
//if (isWindowsVista() || !isWow32()) return false; //if (isWindowsVista() || !isWow32()) return false;
#endif #endif
int count = deviceCount(); int count = deviceCount();

View File

@ -1,60 +0,0 @@
#include "nvtt_experimental.h"
struct NvttImage
{
NvttImage() :
m_constant(false),
m_image(NULL),
m_floatImage(NULL)
{
}
~NvttImage()
{
if (m_constant && m_image) m_image->unwrap();
delete m_image;
delete m_floatImage;
}
bool m_constant;
Image * m_image;
FloatImage * m_floatImage;
};
NvttImage * nvttCreateImage()
{
return new NvttImage();
}
void nvttDestroyImage(NvttImage * img)
{
delete img;
}
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data)
{
nvCheck(img != NULL);
if (format == NVTT_InputFormat_BGRA_8UB)
{
img->m_constant = false;
img->m_image->allocate(w, h);
memcpy(img->m_image->pixels(), data, w * h * 4);
}
else
{
nvCheck(false);
}
}
void nvttCompressImage(NvttImage * img, NvttFormat format)
{
nvCheck(img != NULL);
// @@ Invoke appropriate compressor.
}
#endif // NVTT_EXPERIMENTAL_H

View File

@ -1,55 +0,0 @@
#ifndef NVTT_EXPERIMENTAL_H
#define NVTT_EXPERIMENTAL_H
#include <nvtt/nvtt.h>
typedef struct NvttImage NvttImage;
NvttImage * nvttCreateImage();
void nvttDestroyImage(NvttImage * img);
void nvttSetImageData(NvttImage * img, NvttInputFormat format, uint w, uint h, void * data);
void nvttCompressImage(NvttImage * img, NvttFormat format);
// How to control the compression parameters?
// Using many arguments:
// void nvttCompressImage(img, format, quality, r, g, b, a, ...);
// Using existing compression option class:
// compressionOptions = nvttCreateCompressionOptions();
// nvttSetCompressionOptionsFormat(compressionOptions, format);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsQuality(compressionOptions, quality);
// nvttSetCompressionOptionsColorWeights(compressionOptions, r, g, b, a);
// ...
// nvttCompressImage(img, compressionOptions);
// Using thread local context state:
// void nvttSetCompressionFormat(format);
// void nvttSetCompressionQuality(quality);
// void nvttSetCompressionColorWeights(r, g, b, a);
// ...
// nvttCompressImage(img);
// Using thread local context state, but with GL style function arguments:
// nvttCompressorParameteri(NVTT_FORMAT, format);
// nvttCompressorParameteri(NVTT_QUALITY, quality);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_RED, r);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_GREEN, g);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_BLUE, b);
// nvttCompressorParameterf(NVTT_COLOR_WEIGHT_ALPHA, a);
// or nvttCompressorParameter4f(NVTT_COLOR_WEIGHTS, r, g, b, a);
// ...
// nvttCompressImage(img);
// How do we get the compressed output?
// - Using callbacks. (via new entrypoints, or through outputOptions)
// - Return it explicitely from nvttCompressImage.
// - Store it along the image, retrieve later explicitely with 'nvttGetCompressedData(img, ...)'
#endif // NVTT_EXPERIMENTAL_H

View File

@ -83,9 +83,6 @@ namespace nvtt
Format_BC3n = Format_DXT5n, Format_BC3n = Format_DXT5n,
Format_BC4, // ATI1 Format_BC4, // ATI1
Format_BC5, // 3DC, ATI2 Format_BC5, // 3DC, ATI2
Format_DXT1n,
Format_CTX1,
}; };
/// Quality modes. /// Quality modes.
@ -157,7 +154,6 @@ namespace nvtt
{ {
ColorTransform_None, ColorTransform_None,
ColorTransform_Linear, ColorTransform_Linear,
ColorTransform_Swizzle
}; };
/// Extents rounding mode. /// Extents rounding mode.
@ -222,14 +218,10 @@ namespace nvtt
// Set color transforms. @@ Not implemented! // Set color transforms. @@ Not implemented!
NVTT_API void setColorTransform(ColorTransform t); NVTT_API void setColorTransform(ColorTransform t);
NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3); NVTT_API void setLinearTransform(int channel, float w0, float w1, float w2, float w3);
NVTT_API void setSwizzleTransform(int x, int y, int z, int w);
// Set resizing options. // Set resizing options.
NVTT_API void setMaxExtents(int d); NVTT_API void setMaxExtents(int d);
NVTT_API void setRoundMode(RoundMode mode); NVTT_API void setRoundMode(RoundMode mode);
// Set whether or not to premultiply color by alpha
NVTT_API void setPremultiplyAlpha(bool b);
}; };

View File

@ -29,6 +29,8 @@
#include "colourblock.h" #include "colourblock.h"
#include <cfloat> #include <cfloat>
#include "fastclusterlookup.inl"
namespace squish { namespace squish {
FastClusterFit::FastClusterFit() FastClusterFit::FastClusterFit()
@ -97,91 +99,6 @@ void FastClusterFit::SetColourSet( ColourSet const* colours, int flags )
} }
struct Precomp {
float alpha2_sum;
float beta2_sum;
float alphabeta_sum;
float factor;
};
static SQUISH_ALIGN_16 Precomp s_threeElement[153];
static SQUISH_ALIGN_16 Precomp s_fourElement[969];
void FastClusterFit::DoPrecomputation()
{
int i = 0;
// Three element clusters:
for( int c0 = 0; c0 <= 16; c0++) // At least two clusters.
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
int c2 = 16 - c0 - c1;
/*if (c2 == 16) {
// a = b = x2 / 16
s_threeElement[i].alpha2_sum = 0;
s_threeElement[i].beta2_sum = 16;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_threeElement[i].alpha2_sum = 16;
s_threeElement[i].beta2_sum = 0;
s_threeElement[i].alphabeta_sum = -16;
s_threeElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_threeElement[i].alpha2_sum = c0 + c1 * 0.25f;
s_threeElement[i].beta2_sum = c2 + c1 * 0.25f;
s_threeElement[i].alphabeta_sum = c1 * 0.25f;
s_threeElement[i].factor = 1.0f / (s_threeElement[i].alpha2_sum * s_threeElement[i].beta2_sum - s_threeElement[i].alphabeta_sum * s_threeElement[i].alphabeta_sum);
}
i++;
}
}
//printf("%d three cluster elements\n", i);
// Four element clusters:
i = 0;
for( int c0 = 0; c0 <= 16; c0++)
{
for( int c1 = 0; c1 <= 16-c0; c1++)
{
for( int c2 = 0; c2 <= 16-c0-c1; c2++)
{
int c3 = 16 - c0 - c1 - c2;
/*if (c3 == 16) {
// a = b = x3 / 16
s_fourElement[i].alpha2_sum = 16.0f;
s_fourElement[i].beta2_sum = 0.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else if (c0 == 16) {
// a = b = x0 / 16
s_fourElement[i].alpha2_sum = 0.0f;
s_fourElement[i].beta2_sum = 16.0f;
s_fourElement[i].alphabeta_sum = -16.0f;
s_fourElement[i].factor = 1.0f / 256.0f;
}
else*/ {
s_fourElement[i].alpha2_sum = c0 + c1 * (4.0f/9.0f) + c2 * (1.0f/9.0f);
s_fourElement[i].beta2_sum = c3 + c2 * (4.0f/9.0f) + c1 * (1.0f/9.0f);
s_fourElement[i].alphabeta_sum = (c1 + c2) * (2.0f/9.0f);
s_fourElement[i].factor = 1.0f / (s_fourElement[i].alpha2_sum * s_fourElement[i].beta2_sum - s_fourElement[i].alphabeta_sum * s_fourElement[i].alphabeta_sum);
}
i++;
}
}
}
//printf("%d four cluster elements\n", i);
}
void FastClusterFit::SetMetric(float r, float g, float b) void FastClusterFit::SetMetric(float r, float g, float b)
{ {
#if SQUISH_USE_SIMD #if SQUISH_USE_SIMD

View File

@ -44,8 +44,6 @@ public:
void SetMetric(float r, float g, float b); void SetMetric(float r, float g, float b);
float GetBestError() const; float GetBestError() const;
static void DoPrecomputation();
// Make them public // Make them public
virtual void Compress3( void* block ); virtual void Compress3( void* block );
virtual void Compress4( void* block ); virtual void Compress4( void* block );

File diff suppressed because it is too large Load Diff

View File

@ -1,221 +0,0 @@
// Copyright NVIDIA Corporation 2007 -- Ignacio Castano <icastano@nvidia.com>
//
// Permission is hereby granted, free of charge, to any person
// obtaining a copy of this software and associated documentation
// files (the "Software"), to deal in the Software without
// restriction, including without limitation the rights to use,
// copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the
// Software is furnished to do so, subject to the following
// conditions:
//
// The above copyright notice and this permission notice shall be
// included in all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
// EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
// OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
// NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
// HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
// WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
// FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
// OTHER DEALINGS IN THE SOFTWARE.
#include <nvtt/nvtt.h>
#include <stdio.h> // printf
#include <stdlib.h> // rand
#include <time.h> // clock
#include <string.h> // memcpy, memcmp
#include <assert.h>
#define FRAME_COUNT 1000
#define WIDTH 2048
#define HEIGHT 2048
#define INPUT_SIZE (WIDTH*HEIGHT)
#define OUTPUT_SIZE (WIDTH*HEIGHT/16*2)
static int s_input[INPUT_SIZE];
static int s_reference[OUTPUT_SIZE];
static int s_output[OUTPUT_SIZE];
static int s_frame = 0;
struct MyOutputHandler : public nvtt::OutputHandler
{
MyOutputHandler() : m_ptr(NULL) {}
virtual void beginImage(int size, int width, int height, int depth, int face, int miplevel)
{
assert(size == sizeof(int) * OUTPUT_SIZE);
assert(width == WIDTH);
assert(height == HEIGHT);
assert(depth == 1);
assert(face == 0);
assert(miplevel == 0);
m_ptr = (unsigned char *)s_output;
if (s_frame == 1)
{
// Save first result as reference.
memcpy(s_reference, s_output, sizeof(int) * OUTPUT_SIZE);
}
else if (s_frame > 1)
{
// Compare against reference.
if (memcmp(s_output, s_reference, sizeof(int) * OUTPUT_SIZE) != 0)
{
printf("Compressed image different to original.\n");
exit(EXIT_FAILURE);
}
}
}
virtual bool writeData(const void * data, int size)
{
memcpy(m_ptr, data, size);
m_ptr += size;
return true;
}
unsigned char * m_ptr;
};
void precomp()
{
unsigned int bitmaps[1024];
int num = 0;
printf("{\n");
printf("\t%8X,\n", 0);
bitmaps[0] = 0;
num = 1;
for (int a = 1; a <= 15; a++)
{
for (int b = a; b <= 15; b++)
{
for (int c = b; c <= 15; c++)
{
int indices[16];
int i = 0;
for(; i < a; i++) {
indices[i] = 0;
}
for(; i < a+b; i++) {
indices[i] = 2;
}
for(; i < a+b+c; i++) {
indices[i] = 3;
}
for(; i < 16; i++) {
indices[i] = 1;
}
unsigned int bm = 0;
for(i = 0; i < 16; i++) {
bm |= indices[i] << (i * 2);
}
printf("\t0x%8X, // %d %d %d %d\n", bm, a-0, b-a, c-b, 16-c);
bitmaps[num] = bm;
num++;
}
}
}
printf("}\n");
printf("// num = %d\n", num);
/*
for( int i = imax; i >= 0; --i )
{
// second cluster [i,j) is one third along
for( int m = i; m < 16; ++m )
{
indices[m] = 2;
}
const int jmax = ( i == 0 ) ? 15 : 16;
for( int j = jmax; j >= i; --j )
{
// third cluster [j,k) is two thirds along
for( int m = j; m < 16; ++m )
{
indices[m] = 3;
}
int kmax = ( j == 0 ) ? 15 : 16;
for( int k = kmax; k >= j; --k )
{
// last cluster [k,n) is at the end
if( k < 16 )
{
indices[k] = 1;
}
uint bitmap = 0;
bool hasThree = false;
for(int p = 0; p < 16; p++) {
bitmap |= indices[p] << (p * 2);
}
bitmaps[num] = bitmap;
num++;
}
}
}
*/
}
int main(int argc, char *argv[])
{
//precomp();
nvtt::InputOptions inputOptions;
inputOptions.setTextureLayout(nvtt::TextureType_2D, WIDTH, HEIGHT);
for (int i = 0; i < INPUT_SIZE; i++)
{
s_input[i] = rand();
}
inputOptions.setMipmapData(s_input, WIDTH, HEIGHT);
inputOptions.setMipmapGeneration(false);
nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(nvtt::Format_DXT1);
// compressionOptions.setFormat(nvtt::Format_DXT1n);
// compressionOptions.setFormat(nvtt::Format_CTX1);
nvtt::OutputOptions outputOptions;
outputOptions.setOutputHeader(false);
MyOutputHandler outputHandler;
outputOptions.setOutputHandler(&outputHandler);
nvtt::Compressor compressor;
for (s_frame = 0; s_frame < FRAME_COUNT; s_frame++)
{
clock_t start = clock();
printf("compressing frame %d:\n", s_frame);
compressor.process(inputOptions, compressionOptions, outputOptions);
clock_t end = clock();
printf("time taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
}
return EXIT_SUCCESS;
}

View File

@ -42,11 +42,11 @@ struct MyOutputHandler : public nvtt::OutputHandler
MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {} MyOutputHandler(const char * name) : total(0), progress(0), percentage(0), stream(new nv::StdOutputStream(name)) {}
virtual ~MyOutputHandler() { delete stream; } virtual ~MyOutputHandler() { delete stream; }
virtual void setTotal(int64 t) void setTotal(int64 t)
{ {
total = t + 128; total = t + 128;
} }
virtual void setDisplayProgress(bool b) void setDisplayProgress(bool b)
{ {
verbose = b; verbose = b;
} }
@ -140,7 +140,6 @@ int main(int argc, char *argv[])
bool silent = false; bool silent = false;
bool bc1n = false; bool bc1n = false;
nvtt::Format format = nvtt::Format_BC1; nvtt::Format format = nvtt::Format_BC1;
bool premultiplyAlpha = false;
const char * externalCompressor = NULL; const char * externalCompressor = NULL;
@ -174,10 +173,6 @@ int main(int argc, char *argv[])
{ {
noMipmaps = true; noMipmaps = true;
} }
else if (strcmp("-premula", argv[i]) == 0)
{
premultiplyAlpha = true;
}
// Compression options. // Compression options.
else if (strcmp("-fast", argv[i]) == 0) else if (strcmp("-fast", argv[i]) == 0)
@ -271,8 +266,7 @@ int main(int argc, char *argv[])
printf(" -tonormal\tConvert input to normal map.\n"); printf(" -tonormal\tConvert input to normal map.\n");
printf(" -clamp \tClamp wrapping mode (default).\n"); printf(" -clamp \tClamp wrapping mode (default).\n");
printf(" -repeat \tRepeat wrapping mode.\n"); printf(" -repeat \tRepeat wrapping mode.\n");
printf(" -nomips \tDisable mipmap generation.\n"); printf(" -nomips \tDisable mipmap generation.\n\n");
printf(" -premula \tPremultiply alpha into color channel.\n\n");
printf("Compression options:\n"); printf("Compression options:\n");
printf(" -fast \tFast compression.\n"); printf(" -fast \tFast compression.\n");
@ -379,11 +373,6 @@ int main(int argc, char *argv[])
inputOptions.setMipmapGeneration(false); inputOptions.setMipmapGeneration(false);
} }
if (premultiplyAlpha)
{
inputOptions.setPremultiplyAlpha(true);
inputOptions.setAlphaMode(nvtt::AlphaMode_Premultiplied);
}
nvtt::CompressionOptions compressionOptions; nvtt::CompressionOptions compressionOptions;
compressionOptions.setFormat(format); compressionOptions.setFormat(format);

View File

@ -31,144 +31,40 @@
#include "cmdline.h" #include "cmdline.h"
#include <time.h> // clock
int main(int argc, char *argv[]) int main(int argc, char *argv[])
{ {
MyAssertHandler assertHandler; MyAssertHandler assertHandler;
MyMessageHandler messageHandler; MyMessageHandler messageHandler;
bool forcenormal = false; if (argc != 2)
bool mipmaps = false;
bool faces = false;
nv::Path input;
nv::Path output;
// Parse arguments.
for (int i = 1; i < argc; i++)
{ {
if (strcmp("-forcenormal", argv[i]) == 0)
{
forcenormal = true;
}
else if (strcmp("-mipmaps", argv[i]) == 0)
{
mipmaps = true;
}
else if (strcmp("-faces", argv[i]) == 0)
{
faces = true;
}
else if (argv[i][0] != '-')
{
input = argv[i];
if (i+1 < argc && argv[i+1][0] != '-')
{
output = argv[i+1];
}
else
{
output.copy(input.str());
output.stripExtension();
output.append(".tga");
}
break;
}
}
printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n"); printf("NVIDIA Texture Tools - Copyright NVIDIA Corporation 2007\n\n");
printf("usage: nvdecompress 'ddsfile'\n\n");
if (input.isNull())
{
printf("usage: nvdecompress [options] infile [outfile]\n\n");
printf("Note: the .tga extension is forced on outfile\n\n");
printf("Input options:\n");
printf(" -forcenormal \tThe input image is a normal map.\n");
printf(" -mipmaps \tDecompress all mipmaps.\n");
printf(" -faces \tDecompress all faces.\n");
return 1; return 1;
} }
// Load surface. // Load surface.
nv::DirectDrawSurface dds(input); nv::DirectDrawSurface dds(argv[1]);
if (!dds.isValid()) if (!dds.isValid())
{ {
fprintf(stderr, "The file '%s' is not a valid DDS file.\n", input.str()); printf("The file '%s' is not a valid DDS file.\n", argv[1]);
return 1; return 1;
} }
if (!dds.isSupported() || dds.isTexture3D()) nv::Path name(argv[1]);
{ name.stripExtension();
fprintf(stderr, "The file '%s' is not a supported DDS file.\n", input.str());
return 1;
}
uint faceCount;
if (dds.isTexture2D())
{
faceCount = 1;
}
else
{
nvCheck(dds.isTextureCube());
faceCount = 6;
}
uint mipmapCount = dds.mipmapCount();
clock_t start = clock();
// apply arguments
if (forcenormal)
{
dds.setNormalFlag(true);
}
if (!faces)
{
faceCount = 1;
}
if (!mipmaps)
{
mipmapCount = 1;
}
nv::Image mipmap;
nv::Path name;
// strip extension, we force the tga extension
output.stripExtension();
// extract faces and mipmaps
for (uint f = 0; f < faceCount; f++)
{
for (uint m = 0; m < mipmapCount; m++)
{
dds.mipmap(&mipmap, f, m);
// set output filename, if we are doing faces and/or mipmaps
name.copy(output);
if (faces) name.appendFormat("_face%d", f);
if (mipmaps) name.appendFormat("_mipmap%d", m);
name.append(".tga"); name.append(".tga");
nv::StdOutputStream stream(name.str()); nv::StdOutputStream stream(name.str());
if (stream.isError()) { if (stream.isError()) {
fprintf(stderr, "Error opening '%s' for writting\n", name.str()); printf("Error opening '%s' for writting\n", name.str());
return 1; return 1;
} }
nv::ImageIO::saveTGA(stream, &mipmap); // @@ TODO: Add command line options to output mipmaps, cubemap faces, etc.
} nv::Image img;
} dds.mipmap(&img, 0, 0); // get first image
nv::ImageIO::saveTGA(stream, &img);
clock_t end = clock();
printf("\rtime taken: %.3f seconds\n", float(end-start) / CLOCKS_PER_SEC);
return 0; return 0;
} }